diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index f4d17b3596..bb4e2d24c9 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ -blank_issues_enabled: false +blank_issues_enabled: true contact_links: - name: Prometheus Community Support url: https://prometheus.io/community/ diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 3f6cf76e16..bf7f681b69 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -12,8 +12,8 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: bufbuild/buf-setup-action@62ee92603c244ad0da98bab36a834a999a5329e6 # v1.43.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 632d38cb00..669305ebd3 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -12,8 +12,8 @@ jobs: runs-on: ubuntu-latest if: github.repository_owner == 'prometheus' steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: bufbuild/buf-setup-action@62ee92603c244ad0da98bab36a834a999a5329e6 # v1.43.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b634131b0a..2714211dd7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,13 +12,9 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. image: quay.io/prometheus/golang-builder:1.23-base - env: - # Preliminary fix to make Go tests with race detector not use too much memory, - # see https://github.com/prometheus/prometheus/issues/14858. - GOMEMLIMIT: 10GiB steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - uses: ./.github/promci/actions/setup_environment with: enable_npm: true @@ -33,8 +29,8 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - run: GOARCH=386 go test ./cmd/prometheus @@ -52,7 +48,7 @@ jobs: # The go version in this image should be N-1 wrt test_go. image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - run: make build # Don't run NPM build; don't run race-detector. - run: make test GO_ONLY=1 test-flags="" @@ -66,8 +62,8 @@ jobs: image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - uses: ./.github/promci/actions/setup_environment with: enable_go: false @@ -83,7 +79,7 @@ jobs: name: Go tests on Windows runs-on: windows-latest steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: go-version: 1.23.x @@ -100,7 +96,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - run: go install ./cmd/promtool/. - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest @@ -125,8 +121,8 @@ jobs: matrix: thread: [ 0, 1, 2 ] steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - uses: ./.github/promci/actions/build with: promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" @@ -150,8 +146,8 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - uses: ./.github/promci/actions/build with: parallelism: 12 @@ -173,7 +169,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Install Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: @@ -186,7 +182,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Install Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: @@ -212,8 +208,8 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - uses: ./.github/promci/actions/publish_main with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -229,8 +225,8 @@ jobs: || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - uses: ./.github/promci/actions/publish_release with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -244,10 +240,10 @@ jobs: needs: [test_ui, codeql] steps: - name: Checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 - name: Install nodejs - uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 + uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 with: node-version-file: "web/ui/.nvmrc" registry-url: "https://registry.npmjs.org" diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 89aa2ba29b..77fbd4dafb 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -24,15 +24,15 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Initialize CodeQL - uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + uses: github/codeql-action/init@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + uses: github/codeql-action/autobuild@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + uses: github/codeql-action/analyze@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index 8ddbc34aeb..144859486d 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -18,7 +18,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -40,7 +40,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index f3953cb2a4..80356e45bf 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml index 537e9abd84..aa306c46d0 100644 --- a/.github/workflows/repo_sync.yml +++ b/.github/workflows/repo_sync.yml @@ -13,7 +13,7 @@ jobs: container: image: quay.io/prometheus/golang-builder steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - run: ./scripts/sync_repo_files.sh env: GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 82cccb2bc1..c63727f7f1 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -21,7 +21,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # tag=v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # tag=v4.2.0 with: persist-credentials: false @@ -37,7 +37,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # tag=v4.3.4 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # tag=v4.4.0 with: name: SARIF file path: results.sarif @@ -45,6 +45,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # tag=v3.26.6 + uses: github/codeql-action/upload-sarif@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # tag=v3.26.10 with: sarif_file: results.sarif diff --git a/.golangci.yml b/.golangci.yml index 303cd33d8b..c512101e1b 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -23,6 +23,7 @@ linters: - usestdlibvars - whitespace - loggercheck + - sloglint issues: max-issues-per-linter: 0 @@ -100,8 +101,6 @@ linters-settings: - (net/http.ResponseWriter).Write # No need to check for errors on server's shutdown. - (*net/http.Server).Shutdown - # Never check for logger errors. - - (github.com/go-kit/log.Logger).Log # Never check for rollback errors as Rollback() is called when a previous error was detected. - (github.com/prometheus/prometheus/storage.Appender).Rollback goimports: @@ -153,14 +152,4 @@ linters-settings: disable: - float-compare - go-require - enable: - - bool-compare - - compares - - empty - - error-is-as - - error-nil - - expected-actual - - len - - require-error - - suite-dont-use-pkg - - suite-extra-assert-call + enable-all: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f0daf72f3..cdfed5ba52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,48 @@ # Changelog +## unreleased + +* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 +* [CHANGE] Remote-write: default enable_http2 to false. #15219 +* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164 +* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178 +* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657 +* [CHANGE] Disallow configuring AM with the v1 api. #13883 +* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 +* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 +* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 +* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251 + +## 3.0.0-beta.1 / 2024-10-09 + +* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 +* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930 +* [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894 +* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160 +* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906 +* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion' is enabled. #14738 +* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111 +* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096 +* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082 +* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 +* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 +* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 +* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929 +* [ENHANCEMENT] Consul SD: Support catalog filters. #11224 +* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 +* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932 +* [PERF] TSDB: Grow postings by doubling. #14721 +* [PERF] Relabeling: Optimize adding a constant label pair. #12180 +* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095 +* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910 +* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854 +* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884 +* [BUGFIX] PromQL: Unary negation of native histograms. #14821 +* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025 +* [BUGFIX] Autoreload: Reload invalid yaml files. #14947 + ## 3.0.0-beta.0 / 2024-09-05 Release 3.0.0-beta.0 includes new features such as a brand new UI and UTF-8 support enabled by default. As a new major version, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. Most users should be able to try this release out of the box without any configuration changes. @@ -16,32 +59,43 @@ As is traditional with a beta release, we do **not** recommend users install 3.0 * [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747 * [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526 * [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 -* [FEATURE] Promtool: Allow additional labels to be added to blocks created from openmetrics. #14402 -* [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200 -* [FEATURE] Automatic reloading of the Prometheus configuration file at a specified interval #14769 -* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706 -* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 -* [ENHANCEMENT] Scrape: Only parse created timestamp if `created-timestamp-zero-ingestion` feature flag is enabled. This is as a lot of memory is used when parsing the created timestamp in the OM text format. #14815 -* [ENHANCEMENT] Scrape: Add support for logging scrape failures to a specified file. #14734 -* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 -* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 * [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 -* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls.#14816 -* [ENHANCEMENT] Add support for multiple listening addresses. #14665 -* [ENHANCEMENT] Add the ability to set custom HTTP headers. #14817 -* [BUGFIX] TSDB: Fix shard initialization after WAL repair. #14731 -* [BUGFIX] UTF-8: Ensure correct validation when legacy mode turned on. #14736 -* [BUGFIX] SD: Make discovery manager notify consumers of dropped targets for still defined jobs. #13147 -* [BUGFIX] SD: Prevent the new service discovery manager from storing stale targets. #13622 -* [BUGFIX] Remote Write 2.0: Ensure metadata records are sent from the WAL to remote write during WAL replay. #14766 -* [BUGFIX] Scrape: Do no override target parameter labels with config params. #11029 -* [BUGFIX] Scrape: Reset exemplar position when scraping histograms in protobuf. #14810 -* [BUGFIX] Native Histograms: Do not re-use spans between histograms. #14771 -* [BUGFIX] Scrape: Only parse created timestamp if `created-timestamp-zero-ingestion` feature flag is enabled. This is as a lot of memory is used when parsing the created timestamp in the OM text format. #14815 -* [BUGFIX] TSDB: Fix panic in query during truncation with OOO head. #14831 -* [BUGFIX] TSDB: Fix panic in chunk querier. #14874 -* [BUGFIX] promql.Engine.Close: No-op if nil. #14861 -* [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042 +* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 +* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 + +## 2.55.0 / 2024-10-22 + +* [FEATURE] PromQL: Add experimental `info` function. #14495 +* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 +* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 +* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 +* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 +* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200 +* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 +* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 +* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 +* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532 +* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 +* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 +* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 +* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 +* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 +* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985 +* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 +* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 +* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 +* [ENHANCEMENT] API: Support multiple listening addresses. #14665 +* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 +* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120 +* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 +* [BUGFIX] PromQL: make sort_by_label stable. #14985 +* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 +* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 +* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 +* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766 +* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 +* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 +* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 ## 2.54.1 / 2024-08-27 @@ -140,7 +194,7 @@ This release changes the default for GOGC, the Go runtime control for the trade- * [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754 * [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772 * [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823 -* [ENHANCEMENT] Observability: Log chunk snapshot and mmaped chunk replay duration during WAL replay. #13838 +* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838 * [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846 * [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667 * [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852 @@ -677,7 +731,7 @@ The binaries published with this release are built with Go1.17.8 to avoid [CVE-2 ## 2.33.0 / 2022-01-29 -* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121 +* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121 * [CHANGE] Web: Promote remote-write-receiver to stable. #10119 * [FEATURE] Config: Add `stripPort` template function. #10002 * [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045 @@ -914,7 +968,7 @@ This vulnerability has been reported by Aaron Devaney from MDSec. * [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682 * [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659 * [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790 -* [BUGFIX] TSDB: Avoid panic when mmaped memory is referenced after the file is closed. #8723 +* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723 * [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737 * [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766 @@ -1840,7 +1894,7 @@ information, read the announcement blog post and migration guide. ## 1.7.0 / 2017-06-06 * [CHANGE] Compress remote storage requests and responses with unframed/raw snappy. -* [CHANGE] Properly ellide secrets in config. +* [CHANGE] Properly elide secrets in config. * [FEATURE] Add OpenStack service discovery. * [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces. * [FEATURE] Add metric for discovered number of Alertmanagers. diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 3661ddaa0a..de3f3c73b7 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -2,7 +2,6 @@ General maintainers: * Bryan Boreham (bjboreham@gmail.com / @bboreham) -* Levi Harrison (levi@leviharrison.dev / @LeviHarrison) * Ayoub Mrini (ayoubmrini424@gmail.com / @machine424) * Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie) @@ -13,13 +12,12 @@ Maintainers for specific parts of the codebase: * `k8s`: Frederic Branczyk ( / @brancz) * `documentation` * `prometheus-mixin`: Matthias Loibl ( / @metalmatze) -* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), +* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), George Krajcsovits ( / @krajorama) * `storage` * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( / @npazosmendez), Alex Greenbank ( / @alexgreenbank) - * `otlptranslator`: Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) + * `otlptranslator`: Arthur Silva Sens ( / @ArthurSens), Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) * `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez) - * `agent`: Robert Fratto ( / @rfratto) * `web` * `ui`: Julius Volz ( / @juliusv) * `module`: Augustin Husson ( @nexucis) diff --git a/Makefile b/Makefile index cf55c29628..0b5935de00 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,11 @@ include Makefile.common DOCKER_IMAGE_NAME ?= prometheus +# Only build UI if PREBUILT_ASSETS_STATIC_DIR is not set +ifdef PREBUILT_ASSETS_STATIC_DIR + SKIP_UI_BUILD = true +endif + .PHONY: update-npm-deps update-npm-deps: @echo ">> updating npm dependencies" @@ -75,8 +80,24 @@ ui-lint: cd $(UI_PATH)/react-app && npm run lint .PHONY: assets +ifndef SKIP_UI_BUILD assets: ui-install ui-build +.PHONY: npm_licenses +npm_licenses: ui-install + @echo ">> bundling npm licenses" + rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses + ln -s . npm_licenses + find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=- + rm -f npm_licenses +else +assets: + @echo '>> skipping assets build, pre-built assets provided' + +npm_licenses: + @echo '>> skipping assets npm licenses, pre-built assets provided' +endif + .PHONY: assets-compress assets-compress: assets @echo '>> compressing assets' @@ -125,14 +146,6 @@ else test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version endif -.PHONY: npm_licenses -npm_licenses: ui-install - @echo ">> bundling npm licenses" - rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses - ln -s . npm_licenses - find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=- - rm -f npm_licenses - .PHONY: tarball tarball: npm_licenses common-tarball diff --git a/Makefile.common b/Makefile.common index 34d65bb56d..cbb5d86382 100644 --- a/Makefile.common +++ b/Makefile.common @@ -275,3 +275,9 @@ $(1)_precheck: exit 1; \ fi endef + +govulncheck: install-govulncheck + govulncheck ./... + +install-govulncheck: + command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest diff --git a/README.md b/README.md index df974e1097..7528147b0e 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ The Makefile provides several targets: Prometheus is bundled with many service discovery plugins. When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml) -file to disable some service discoveries. The file is a yaml-formated list of go +file to disable some service discoveries. The file is a yaml-formatted list of go import path that will be built into the Prometheus binary. After you have changed the file, you diff --git a/RELEASE.md b/RELEASE.md index 53fdc44337..8e78a6a3ec 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -59,6 +59,7 @@ Release cadence of first pre-releases being cut is 6 weeks. | v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) | | v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) | | v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) | +| v2.55 | 2024-09-17 | Bryan Boreham (GitHub: @bboreham) | If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. diff --git a/VERSION b/VERSION index 7e9b524994..1941d52827 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0-beta.0 +3.0.0-beta.1 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 7bd51054e3..8fb6d4d38e 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -18,11 +18,11 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "math/bits" "net" "net/http" - _ "net/http/pprof" // Comment this line to disable pprof endpoint. "net/url" "os" "os/signal" @@ -38,8 +38,6 @@ import ( "github.com/KimMachineGun/automemlimit/memlimit" "github.com/alecthomas/kingpin/v2" "github.com/alecthomas/units" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/mwitkow/go-conntrack" "github.com/oklog/run" @@ -47,8 +45,8 @@ import ( "github.com/prometheus/client_golang/prometheus/collectors" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" - promlogflag "github.com/prometheus/common/promlog/flag" + "github.com/prometheus/common/promslog" + promslogflag "github.com/prometheus/common/promslog/flag" "github.com/prometheus/common/version" toolkit_web "github.com/prometheus/exporter-toolkit/web" "go.uber.org/atomic" @@ -77,10 +75,50 @@ import ( "github.com/prometheus/prometheus/tsdb/wlog" "github.com/prometheus/prometheus/util/documentcli" "github.com/prometheus/prometheus/util/logging" + "github.com/prometheus/prometheus/util/notifications" prom_runtime "github.com/prometheus/prometheus/util/runtime" "github.com/prometheus/prometheus/web" ) +// klogv1OutputCallDepth is the stack depth where we can find the origin of this call. +const klogv1OutputCallDepth = 6 + +// klogv1DefaultPrefixLength is the length of the log prefix that we have to strip out. +const klogv1DefaultPrefixLength = 53 + +// klogv1Writer is used in SetOutputBySeverity call below to redirect any calls +// to klogv1 to end up in klogv2. +// This is a hack to support klogv1 without use of go-kit/log. It is inspired +// by klog's upstream klogv1/v2 coexistence example: +// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go +type klogv1Writer struct{} + +// Write redirects klogv1 calls to klogv2. +// This is a hack to support klogv1 without use of go-kit/log. It is inspired +// by klog's upstream klogv1/v2 coexistence example: +// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go +func (kw klogv1Writer) Write(p []byte) (n int, err error) { + if len(p) < klogv1DefaultPrefixLength { + klogv2.InfoDepth(klogv1OutputCallDepth, string(p)) + return len(p), nil + } + + switch p[0] { + case 'I': + klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'W': + klogv2.WarningDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'E': + klogv2.ErrorDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'F': + klogv2.FatalDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + default: + klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + } + + return len(p), nil +} + var ( appName = "prometheus" @@ -135,24 +173,25 @@ func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCla type flagConfig struct { configFile string - agentStoragePath string - serverStoragePath string - notifier notifier.Options - forGracePeriod model.Duration - outageTolerance model.Duration - resendDelay model.Duration - maxConcurrentEvals int64 - web web.Options - scrape scrape.Options - tsdb tsdbOptions - agent agentOptions - lookbackDelta model.Duration - webTimeout model.Duration - queryTimeout model.Duration - queryConcurrency int - queryMaxSamples int - RemoteFlushDeadline model.Duration - nameEscapingScheme string + agentStoragePath string + serverStoragePath string + notifier notifier.Options + forGracePeriod model.Duration + outageTolerance model.Duration + resendDelay model.Duration + maxConcurrentEvals int64 + web web.Options + scrape scrape.Options + tsdb tsdbOptions + agent agentOptions + lookbackDelta model.Duration + webTimeout model.Duration + queryTimeout model.Duration + queryConcurrency int + queryMaxSamples int + RemoteFlushDeadline model.Duration + nameEscapingScheme string + maxNotificationsSubscribers int enableAutoReload bool autoReloadInterval model.Duration @@ -161,94 +200,87 @@ type flagConfig struct { memlimitRatio float64 // These options are extracted from featureList // for ease of use. - enableExpandExternalLabels bool - enablePerStepStats bool - enableAutoGOMAXPROCS bool - enableAutoGOMEMLIMIT bool - enableConcurrentRuleEval bool + enablePerStepStats bool + enableAutoGOMAXPROCS bool + enableAutoGOMEMLIMIT bool + enableConcurrentRuleEval bool prometheusURL string corsRegexString string - promlogConfig promlog.Config - promqlEnableDelayedNameRemoval bool + + promslogConfig promslog.Config } // setFeatureListOptions sets the corresponding options from the featureList. -func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { +func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { for _, f := range c.featureList { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "otlp-write-receiver": - c.web.EnableOTLPWriteReceiver = true - level.Info(logger).Log("msg", "Experimental OTLP write receiver enabled") - case "expand-external-labels": - c.enableExpandExternalLabels = true - level.Info(logger).Log("msg", "Experimental expand-external-labels enabled") case "exemplar-storage": c.tsdb.EnableExemplarStorage = true - level.Info(logger).Log("msg", "Experimental in-memory exemplar storage enabled") + logger.Info("Experimental in-memory exemplar storage enabled") case "memory-snapshot-on-shutdown": c.tsdb.EnableMemorySnapshotOnShutdown = true - level.Info(logger).Log("msg", "Experimental memory snapshot on shutdown enabled") + logger.Info("Experimental memory snapshot on shutdown enabled") case "extra-scrape-metrics": c.scrape.ExtraMetrics = true - level.Info(logger).Log("msg", "Experimental additional scrape metrics enabled") + logger.Info("Experimental additional scrape metrics enabled") case "metadata-wal-records": c.scrape.AppendMetadata = true - level.Info(logger).Log("msg", "Experimental metadata records in WAL enabled, required for remote write 2.0") + logger.Info("Experimental metadata records in WAL enabled, required for remote write 2.0") case "promql-per-step-stats": c.enablePerStepStats = true - level.Info(logger).Log("msg", "Experimental per-step statistics reporting") + logger.Info("Experimental per-step statistics reporting") case "auto-gomaxprocs": c.enableAutoGOMAXPROCS = true - level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota") + logger.Info("Automatically set GOMAXPROCS to match Linux container CPU quota") case "auto-reload-config": c.enableAutoReload = true if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 { c.autoReloadInterval, _ = model.ParseDuration("1s") } - level.Info(logger).Log("msg", fmt.Sprintf("Enabled automatic configuration file reloading. Checking for configuration changes every %s.", c.autoReloadInterval)) + logger.Info("Enabled automatic configuration file reloading. Checking for configuration changes every", "interval", c.autoReloadInterval) case "auto-gomemlimit": c.enableAutoGOMEMLIMIT = true - level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit") + logger.Info("Automatically set GOMEMLIMIT to match Linux container or system memory limit") case "concurrent-rule-eval": c.enableConcurrentRuleEval = true - level.Info(logger).Log("msg", "Experimental concurrent rule evaluation enabled.") - case "no-default-scrape-port": - c.scrape.NoDefaultPort = true - level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.") + logger.Info("Experimental concurrent rule evaluation enabled.") case "promql-experimental-functions": parser.EnableExperimentalFunctions = true - level.Info(logger).Log("msg", "Experimental PromQL functions enabled.") + logger.Info("Experimental PromQL functions enabled.") case "native-histograms": c.tsdb.EnableNativeHistograms = true c.scrape.EnableNativeHistogramsIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols - level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + logger.Info("Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + case "ooo-native-histograms": + c.tsdb.EnableOOONativeHistograms = true + logger.Info("Experimental out-of-order native histogram ingestion enabled. This will only take effect if OutOfOrderTimeWindow is > 0 and if EnableNativeHistograms = true") case "created-timestamp-zero-ingestion": c.scrape.EnableCreatedTimestampZeroIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols - level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + logger.Info("Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) case "delayed-compaction": c.tsdb.EnableDelayedCompaction = true - level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.") + logger.Info("Experimental delayed compaction is enabled.") case "promql-delayed-name-removal": c.promqlEnableDelayedNameRemoval = true - level.Info(logger).Log("msg", "Experimental PromQL delayed name removal enabled.") + logger.Info("Experimental PromQL delayed name removal enabled.") case "": continue case "old-ui": c.web.UseOldUI = true - level.Info(logger).Log("msg", "Serving previous version of the Prometheus web UI.") + logger.Info("Serving previous version of the Prometheus web UI.") default: - level.Warn(logger).Log("msg", "Unknown option for --enable-feature", "option", o) + logger.Warn("Unknown option for --enable-feature", "option", o) } } } @@ -282,7 +314,7 @@ func main() { Registerer: prometheus.DefaultRegisterer, Gatherer: prometheus.DefaultGatherer, }, - promlogConfig: promlog.Config{}, + promslogConfig: promslog.Config{}, } a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server").UsageWriter(os.Stdout) @@ -315,6 +347,9 @@ func main() { a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners."). Default("512").IntVar(&cfg.web.MaxConnections) + a.Flag("web.max-notifications-subscribers", "Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close."). + Default("16").IntVar(&cfg.maxNotificationsSubscribers) + a.Flag("web.external-url", "The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically."). PlaceHolder("").StringVar(&cfg.prometheusURL) @@ -341,6 +376,9 @@ func main() { a.Flag("web.remote-write-receiver.accepted-protobuf-messages", fmt.Sprintf("List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: %v", supportedRemoteWriteProtoMsgs.String())). Default(supportedRemoteWriteProtoMsgs.Strings()...).SetValue(rwProtoMsgFlagValue(&cfg.web.AcceptRemoteWriteProtoMsgs)) + a.Flag("web.enable-otlp-receiver", "Enable API endpoint accepting OTLP write requests."). + Default("false").BoolVar(&cfg.web.EnableOTLPWriteReceiver) + a.Flag("web.console.templates", "Path to the console template directory, available at /consoles."). Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath) @@ -380,6 +418,9 @@ func main() { serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory."). Default("false").BoolVar(&cfg.tsdb.NoLockfile) + serverOnlyFlag(a, "storage.tsdb.allow-overlapping-compaction", "Allow compaction of overlapping blocks. If set to false, TSDB stops vertical compaction and leaves overlapping blocks there. The use case is to let another component handle the compaction of overlapping blocks."). + Default("true").Hidden().BoolVar(&cfg.tsdb.EnableOverlappingCompaction) + serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL."). Hidden().Default("true").BoolVar(&cfg.tsdb.WALCompression) @@ -392,6 +433,9 @@ func main() { serverOnlyFlag(a, "storage.tsdb.samples-per-chunk", "Target number of samples per chunk."). Default("120").Hidden().IntVar(&cfg.tsdb.SamplesPerChunk) + serverOnlyFlag(a, "storage.tsdb.delayed-compaction.max-percent", "Sets the upper limit for the random compaction delay, specified as a percentage of the head chunk range. 100 means the compaction can be delayed by up to the entire head chunk range. Only effective when the delayed-compaction feature flag is enabled."). + Default("10").Hidden().IntVar(&cfg.tsdb.CompactionDelayMaxPercent) + agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage."). Default("data-agent/").StringVar(&cfg.agentStoragePath) @@ -471,12 +515,12 @@ func main() { a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode) - promlogflag.AddFlags(a, &cfg.promlogConfig) + promslogflag.AddFlags(a, &cfg.promslogConfig) a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(ctx *kingpin.ParseContext) error { if err := documentcli.GenerateMarkdown(a.Model(), os.Stdout); err != nil { @@ -494,7 +538,13 @@ func main() { os.Exit(2) } - logger := promlog.New(&cfg.promlogConfig) + logger := promslog.New(&cfg.promslogConfig) + slog.SetDefault(logger) + + notifs := notifications.NewNotifications(cfg.maxNotificationsSubscribers, prometheus.DefaultRegisterer) + cfg.web.NotificationsSub = notifs.Sub + cfg.web.NotificationsGetter = notifs.Get + notifs.AddNotification(notifications.StartingUp) if err := cfg.setFeatureListOptions(logger); err != nil { fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing feature list: %w", err)) @@ -544,12 +594,12 @@ func main() { // Throw error for invalid config before starting other components. var cfgFile *config.Config - if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, log.NewNopLogger()); err != nil { + if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil { absPath, pathErr := filepath.Abs(cfg.configFile) if pathErr != nil { absPath = cfg.configFile } - level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err) + logger.Error(fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err) os.Exit(2) } if _, err := cfgFile.GetScrapeConfigs(); err != nil { @@ -557,7 +607,7 @@ func main() { if pathErr != nil { absPath = cfg.configFile } - level.Error(logger).Log("msg", fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err) + logger.Error(fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err) os.Exit(2) } if cfg.tsdb.EnableExemplarStorage { @@ -590,7 +640,7 @@ func main() { if !agentMode { if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 { cfg.tsdb.RetentionDuration = defaultRetentionDuration - level.Info(logger).Log("msg", "No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) + logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) } // Check for overflows. This limits our max retention to 100y. @@ -600,7 +650,7 @@ func main() { panic(err) } cfg.tsdb.RetentionDuration = y - level.Warn(logger).Log("msg", "Time retention value is too high. Limiting to: "+y.String()) + logger.Warn("Time retention value is too high. Limiting to: " + y.String()) } // Max block size settings. @@ -616,16 +666,19 @@ func main() { cfg.tsdb.MaxBlockDuration = maxBlockDuration } + + // Delayed compaction checks + if cfg.tsdb.EnableDelayedCompaction && (cfg.tsdb.CompactionDelayMaxPercent > 100 || cfg.tsdb.CompactionDelayMaxPercent <= 0) { + logger.Warn("The --storage.tsdb.delayed-compaction.max-percent should have a value between 1 and 100. Using default", "default", tsdb.DefaultCompactionDelayMaxPercent) + cfg.tsdb.CompactionDelayMaxPercent = tsdb.DefaultCompactionDelayMaxPercent + } } noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{} noStepSubqueryInterval.Set(config.DefaultGlobalConfig.EvaluationInterval) - // Above level 6, the k8s client would log bearer tokens in clear-text. - klog.ClampLevel(6) - klog.SetLogger(log.With(logger, "component", "k8s_client_runtime")) - klogv2.ClampLevel(6) - klogv2.SetLogger(log.With(logger, "component", "k8s_client_runtime")) + klogv2.SetSlogLogger(logger.With("component", "k8s_client_runtime")) + klog.SetOutputBySeverity("INFO", klogv1Writer{}) modeAppName := "Prometheus Server" mode := "server" @@ -634,20 +687,22 @@ func main() { mode = "agent" } - level.Info(logger).Log("msg", "Starting "+modeAppName, "mode", mode, "version", version.Info()) + logger.Info("Starting "+modeAppName, "mode", mode, "version", version.Info()) if bits.UintSize < 64 { - level.Warn(logger).Log("msg", "This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH) + logger.Warn("This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH) } - level.Info(logger).Log("build_context", version.BuildContext()) - level.Info(logger).Log("host_details", prom_runtime.Uname()) - level.Info(logger).Log("fd_limits", prom_runtime.FdLimits()) - level.Info(logger).Log("vm_limits", prom_runtime.VMLimits()) + logger.Info("operational information", + "build_context", version.BuildContext(), + "host_details", prom_runtime.Uname(), + "fd_limits", prom_runtime.FdLimits(), + "vm_limits", prom_runtime.VMLimits(), + ) var ( localStorage = &readyStorage{stats: tsdb.NewDBStats()} scraper = &readyScrapeManager{} - remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata) + remoteStorage = remote.NewStorage(logger.With("component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata) fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage) ) @@ -655,7 +710,7 @@ func main() { ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxRule = context.Background() - notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) + notifierManager = notifier.NewManager(&cfg.notifier, logger.With("component", "notifier")) ctxScrape, cancelScrape = context.WithCancel(context.Background()) ctxNotify, cancelNotify = context.WithCancel(context.Background()) @@ -670,37 +725,37 @@ func main() { // they are not specific to an SD instance. err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer) if err != nil { - level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err) + logger.Error("failed to register Kubernetes client metrics", "err", err) os.Exit(1) } sdMetrics, err := discovery.CreateAndRegisterSDMetrics(prometheus.DefaultRegisterer) if err != nil { - level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err) + logger.Error("failed to register service discovery metrics", "err", err) os.Exit(1) } - discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape")) + discoveryManagerScrape = discovery.NewManager(ctxScrape, logger.With("component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape")) if discoveryManagerScrape == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager scrape") + logger.Error("failed to create a discovery manager scrape") os.Exit(1) } - discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify")) + discoveryManagerNotify = discovery.NewManager(ctxNotify, logger.With("component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify")) if discoveryManagerNotify == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager notify") + logger.Error("failed to create a discovery manager notify") os.Exit(1) } scrapeManager, err := scrape.NewManager( &cfg.scrape, - log.With(logger, "component", "scrape manager"), - func(s string) (log.Logger, error) { return logging.NewJSONFileLogger(s) }, + logger.With("component", "scrape manager"), + logging.NewJSONFileLogger, fanoutStorage, prometheus.DefaultRegisterer, ) if err != nil { - level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err) + logger.Error("failed to create a scrape manager", "err", err) os.Exit(1) } @@ -713,10 +768,10 @@ func main() { if cfg.enableAutoGOMAXPROCS { l := func(format string, a ...interface{}) { - level.Info(logger).Log("component", "automaxprocs", "msg", fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...)) + logger.Info(fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...), "component", "automaxprocs") } if _, err := maxprocs.Set(maxprocs.Logger(l)); err != nil { - level.Warn(logger).Log("component", "automaxprocs", "msg", "Failed to set GOMAXPROCS automatically", "err", err) + logger.Warn("Failed to set GOMAXPROCS automatically", "component", "automaxprocs", "err", err) } } @@ -730,17 +785,17 @@ func main() { ), ), ); err != nil { - level.Warn(logger).Log("component", "automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err) + logger.Warn("automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err) } } if !agentMode { opts := promql.EngineOpts{ - Logger: log.With(logger, "component", "query engine"), + Logger: logger.With("component", "query engine"), Reg: prometheus.DefaultRegisterer, MaxSamples: cfg.queryMaxSamples, Timeout: time.Duration(cfg.queryTimeout), - ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, log.With(logger, "component", "activeQueryTracker")), + ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, logger.With("component", "activeQueryTracker")), LookbackDelta: time.Duration(cfg.lookbackDelta), NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get, // EnableAtModifier and EnableNegativeOffset have to be @@ -761,7 +816,7 @@ func main() { Context: ctxRule, ExternalURL: cfg.web.ExternalURL, Registerer: prometheus.DefaultRegisterer, - Logger: log.With(logger, "component", "rule manager"), + Logger: logger.With("component", "rule manager"), OutageTolerance: time.Duration(cfg.outageTolerance), ForGracePeriod: time.Duration(cfg.forGracePeriod), ResendDelay: time.Duration(cfg.resendDelay), @@ -812,7 +867,7 @@ func main() { } // Depends on cfg.web.ScrapeManager so needs to be after cfg.web.ScrapeManager = scrapeManager. - webHandler := web.New(log.With(logger, "component", "web"), &cfg.web) + webHandler := web.New(logger.With("component", "web"), &cfg.web) // Monitor outgoing connections on default transport with conntrack. http.DefaultTransport.(*http.Transport).DialContext = conntrack.NewDialContextFunc( @@ -939,18 +994,18 @@ func main() { listeners, err := webHandler.Listeners() if err != nil { - level.Error(logger).Log("msg", "Unable to start web listeners", "err", err) + logger.Error("Unable to start web listener", "err", err) if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } os.Exit(1) } err = toolkit_web.Validate(*webConfig) if err != nil { - level.Error(logger).Log("msg", "Unable to validate web configuration file", "err", err) + logger.Error("Unable to validate web configuration file", "err", err) if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } os.Exit(1) } @@ -966,21 +1021,22 @@ func main() { // Don't forget to release the reloadReady channel so that waiting blocks can exit normally. select { case sig := <-term: - level.Warn(logger).Log("msg", "Received an OS signal, exiting gracefully...", "signal", sig.String()) + logger.Warn("Received an OS signal, exiting gracefully...", "signal", sig.String()) reloadReady.Close() case <-webHandler.Quit(): - level.Warn(logger).Log("msg", "Received termination request via web service, exiting gracefully...") + logger.Warn("Received termination request via web service, exiting gracefully...") case <-cancel: reloadReady.Close() } if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } return nil }, func(err error) { close(cancel) - webHandler.SetReady(false) + webHandler.SetReady(web.Stopping) + notifs.AddNotification(notifications.ShuttingDown) }, ) } @@ -989,11 +1045,11 @@ func main() { g.Add( func() error { err := discoveryManagerScrape.Run() - level.Info(logger).Log("msg", "Scrape discovery manager stopped") + logger.Info("Scrape discovery manager stopped") return err }, func(err error) { - level.Info(logger).Log("msg", "Stopping scrape discovery manager...") + logger.Info("Stopping scrape discovery manager...") cancelScrape() }, ) @@ -1003,11 +1059,11 @@ func main() { g.Add( func() error { err := discoveryManagerNotify.Run() - level.Info(logger).Log("msg", "Notify discovery manager stopped") + logger.Info("Notify discovery manager stopped") return err }, func(err error) { - level.Info(logger).Log("msg", "Stopping notify discovery manager...") + logger.Info("Stopping notify discovery manager...") cancelNotify() }, ) @@ -1036,7 +1092,7 @@ func main() { <-reloadReady.C err := scrapeManager.Run(discoveryManagerScrape.SyncCh()) - level.Info(logger).Log("msg", "Scrape manager stopped") + logger.Info("Scrape manager stopped") return err }, func(err error) { @@ -1044,7 +1100,7 @@ func main() { // so that it doesn't try to write samples to a closed storage. // We should also wait for rule manager to be fully stopped to ensure // we don't trigger any false positive alerts for rules using absent(). - level.Info(logger).Log("msg", "Stopping scrape manager...") + logger.Info("Stopping scrape manager...") scrapeManager.Stop() }, ) @@ -1075,10 +1131,18 @@ func main() { if cfg.enableAutoReload { checksum, err = config.GenerateChecksum(cfg.configFile) if err != nil { - level.Error(logger).Log("msg", "Failed to generate initial checksum for configuration file", "err", err) + logger.Error("Failed to generate initial checksum for configuration file", "err", err) } } + callback := func(success bool) { + if success { + notifs.DeleteNotification(notifications.ConfigurationUnsuccessful) + return + } + notifs.AddNotification(notifications.ConfigurationUnsuccessful) + } + g.Add( func() error { <-reloadReady.C @@ -1086,18 +1150,18 @@ func main() { for { select { case <-hup: - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) } else if cfg.enableAutoReload { if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { checksum = currentChecksum } else { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) + logger.Error("Failed to generate checksum during configuration reload", "err", err) } } case rc := <-webHandler.Reload(): - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) rc <- err } else { rc <- nil @@ -1105,7 +1169,7 @@ func main() { if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { checksum = currentChecksum } else { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) + logger.Error("Failed to generate checksum during configuration reload", "err", err) } } } @@ -1115,16 +1179,14 @@ func main() { } currentChecksum, err := config.GenerateChecksum(cfg.configFile) if err != nil { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) + logger.Error("Failed to generate checksum during configuration reload", "err", err) + } else if currentChecksum == checksum { continue } - if currentChecksum == checksum { - continue - } - level.Info(logger).Log("msg", "Configuration file change detected, reloading the configuration.") + logger.Info("Configuration file change detected, reloading the configuration.") - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) } else { checksum = currentChecksum } @@ -1153,14 +1215,15 @@ func main() { return nil } - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil { return fmt.Errorf("error loading config from %q: %w", cfg.configFile, err) } reloadReady.Close() - webHandler.SetReady(true) - level.Info(logger).Log("msg", "Server is ready to receive web requests.") + webHandler.SetReady(web.Ready) + notifs.DeleteNotification(notifications.StartingUp) + logger.Info("Server is ready to receive web requests.") <-cancel return nil }, @@ -1175,7 +1238,7 @@ func main() { cancel := make(chan struct{}) g.Add( func() error { - level.Info(logger).Log("msg", "Starting TSDB ...") + logger.Info("Starting TSDB ...") if cfg.tsdb.WALSegmentSize != 0 { if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 { return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB") @@ -1194,13 +1257,13 @@ func main() { switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": - level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) default: - level.Info(logger).Log("fs_type", fsType) + logger.Info("filesystem information", "fs_type", fsType) } - level.Info(logger).Log("msg", "TSDB started") - level.Debug(logger).Log("msg", "TSDB options", + logger.Info("TSDB started") + logger.Debug("TSDB options", "MinBlockDuration", cfg.tsdb.MinBlockDuration, "MaxBlockDuration", cfg.tsdb.MaxBlockDuration, "MaxBytes", cfg.tsdb.MaxBytes, @@ -1219,7 +1282,7 @@ func main() { }, func(err error) { if err := fanoutStorage.Close(); err != nil { - level.Error(logger).Log("msg", "Error stopping storage", "err", err) + logger.Error("Error stopping storage", "err", err) } close(cancel) }, @@ -1231,7 +1294,7 @@ func main() { cancel := make(chan struct{}) g.Add( func() error { - level.Info(logger).Log("msg", "Starting WAL storage ...") + logger.Info("Starting WAL storage ...") if cfg.agent.WALSegmentSize != 0 { if cfg.agent.WALSegmentSize < 10*1024*1024 || cfg.agent.WALSegmentSize > 256*1024*1024 { return errors.New("flag 'storage.agent.wal-segment-size' must be set between 10MB and 256MB") @@ -1250,13 +1313,13 @@ func main() { switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": - level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") default: - level.Info(logger).Log("fs_type", fsType) + logger.Info(fsType) } - level.Info(logger).Log("msg", "Agent WAL storage started") - level.Debug(logger).Log("msg", "Agent WAL storage options", + logger.Info("Agent WAL storage started") + logger.Debug("Agent WAL storage options", "WALSegmentSize", cfg.agent.WALSegmentSize, "WALCompression", cfg.agent.WALCompression, "StripeSize", cfg.agent.StripeSize, @@ -1274,7 +1337,7 @@ func main() { }, func(e error) { if err := fanoutStorage.Close(); err != nil { - level.Error(logger).Log("msg", "Error stopping storage", "err", err) + logger.Error("Error stopping storage", "err", err) } close(cancel) }, @@ -1308,7 +1371,7 @@ func main() { <-reloadReady.C notifierManager.Run(discoveryManagerNotify.SyncCh()) - level.Info(logger).Log("msg", "Notifier manager stopped") + logger.Info("Notifier manager stopped") return nil }, func(err error) { @@ -1317,16 +1380,16 @@ func main() { ) } if err := g.Run(); err != nil { - level.Error(logger).Log("err", err) + logger.Error("Error running goroutines from run.Group", "err", err) os.Exit(1) } - level.Info(logger).Log("msg", "See you next time!") + logger.Info("See you next time!") } -func openDBWithMetrics(dir string, logger log.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) { +func openDBWithMetrics(dir string, logger *slog.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) { db, err := tsdb.Open( dir, - log.With(logger, "component", "tsdb"), + logger.With("component", "tsdb"), reg, opts, stats, @@ -1379,21 +1442,23 @@ type reloader struct { reloader func(*config.Config) error } -func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage bool, logger log.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, rls ...reloader) (err error) { +func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) { start := time.Now() - timings := []interface{}{} - level.Info(logger).Log("msg", "Loading configuration file", "filename", filename) + timingsLogger := logger + logger.Info("Loading configuration file", "filename", filename) defer func() { if err == nil { configSuccess.Set(1) configSuccessTime.SetToCurrentTime() + callback(true) } else { configSuccess.Set(0) + callback(false) } }() - conf, err := config.LoadFile(filename, agentMode, expandExternalLabels, logger) + conf, err := config.LoadFile(filename, agentMode, logger) if err != nil { return fmt.Errorf("couldn't load configuration (--config.file=%q): %w", filename, err) } @@ -1408,10 +1473,10 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b for _, rl := range rls { rstart := time.Now() if err := rl.reloader(conf); err != nil { - level.Error(logger).Log("msg", "Failed to apply configuration", "err", err) + logger.Error("Failed to apply configuration", "err", err) failed = true } - timings = append(timings, rl.name, time.Since(rstart)) + timingsLogger = timingsLogger.With((rl.name), time.Since(rstart)) } if failed { return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) @@ -1419,7 +1484,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC) if oldGoGC != conf.Runtime.GoGC { - level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC) + logger.Info("updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC) } // Write the new setting out to the ENV var for runtime API output. if conf.Runtime.GoGC >= 0 { @@ -1429,8 +1494,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b } noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval) - l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)} - level.Info(logger).Log(append(l, timings...)...) + timingsLogger.Info("Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)) return nil } @@ -1584,6 +1648,9 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender { type notReadyAppender struct{} +// SetOptions does nothing in this appender implementation. +func (n notReadyAppender) SetOptions(opts *storage.AppendOptions) {} + func (n notReadyAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } @@ -1596,6 +1663,10 @@ func (n notReadyAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels return 0, tsdb.ErrNotReady } +func (n notReadyAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + return 0, tsdb.ErrNotReady +} + func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } @@ -1734,7 +1805,9 @@ type tsdbOptions struct { EnableMemorySnapshotOnShutdown bool EnableNativeHistograms bool EnableDelayedCompaction bool + CompactionDelayMaxPercent int EnableOverlappingCompaction bool + EnableOOONativeHistograms bool } func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { @@ -1754,8 +1827,10 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { MaxExemplars: opts.MaxExemplars, EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown, EnableNativeHistograms: opts.EnableNativeHistograms, + EnableOOONativeHistograms: opts.EnableOOONativeHistograms, OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow, EnableDelayedCompaction: opts.EnableDelayedCompaction, + CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent, EnableOverlappingCompaction: opts.EnableOverlappingCompaction, } } diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index c16864cb8c..4bd1c71b2d 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -31,9 +31,9 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -125,6 +125,7 @@ func TestFailedStartupExitCode(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() fakeInputFile := "fake-input-file" expectedExitStatus := 2 @@ -211,83 +212,125 @@ func TestWALSegmentSizeBounds(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "9MB", + exitCode: 1, + }, + { + size: "257MB", + exitCode: 1, + }, + { + size: "10", + exitCode: 2, + }, + { + size: "1GB", + exitCode: 1, + }, + { + size: "12MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) { - t.Parallel() - if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "512KB", + exitCode: 1, + }, + { + size: "1MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } @@ -295,7 +338,7 @@ func TestTimeMetrics(t *testing.T) { tmpDir := t.TempDir() reg := prometheus.NewRegistry() - db, err := openDBWithMetrics(tmpDir, log.NewNopLogger(), reg, nil, nil) + db, err := openDBWithMetrics(tmpDir, promslog.NewNopLogger(), reg, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) @@ -353,6 +396,8 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames } func TestAgentSuccessfulStartup(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) require.NoError(t, prom.Start()) @@ -371,6 +416,8 @@ func TestAgentSuccessfulStartup(t *testing.T) { } func TestAgentFailedStartupWithServerFlag(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) output := bytes.Buffer{} @@ -398,6 +445,8 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) { } func TestAgentFailedStartupWithInvalidConfig(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) require.NoError(t, prom.Start()) @@ -419,6 +468,7 @@ func TestModeSpecificFlags(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() testcases := []struct { mode string @@ -433,6 +483,7 @@ func TestModeSpecificFlags(t *testing.T) { for _, tc := range testcases { t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) { + t.Parallel() args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"} if tc.mode == "agent" { @@ -484,6 +535,8 @@ func TestDocumentation(t *testing.T) { if runtime.GOOS == "windows" { t.SkipNow() } + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -508,6 +561,8 @@ func TestDocumentation(t *testing.T) { } func TestRwProtoMsgFlagParser(t *testing.T) { + t.Parallel() + defaultOpts := config.RemoteWriteProtoMsgs{ config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2, } diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index 2011fb123f..94eec27e79 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index 62e317bf8b..25abf5e965 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -125,12 +125,61 @@ func (p *queryLogTest) query(t *testing.T) { require.NoError(t, err) require.Equal(t, 200, r.StatusCode) case ruleOrigin: - time.Sleep(2 * time.Second) + // Poll the /api/v1/rules endpoint until a new rule evaluation is detected. + var lastEvalTime time.Time + for { + r, err := http.Get(fmt.Sprintf("http://%s:%d/api/v1/rules", p.host, p.port)) + require.NoError(t, err) + + rulesBody, err := io.ReadAll(r.Body) + require.NoError(t, err) + defer r.Body.Close() + + // Parse the rules response to find the last evaluation time. + newEvalTime := parseLastEvaluation(rulesBody) + if newEvalTime.After(lastEvalTime) { + if !lastEvalTime.IsZero() { + break + } + lastEvalTime = newEvalTime + } + + time.Sleep(100 * time.Millisecond) + } default: panic("can't query this origin") } } +// parseLastEvaluation extracts the last evaluation timestamp from the /api/v1/rules response. +func parseLastEvaluation(rulesBody []byte) time.Time { + var ruleResponse struct { + Status string `json:"status"` + Data struct { + Groups []struct { + Rules []struct { + LastEvaluation string `json:"lastEvaluation"` + } `json:"rules"` + } `json:"groups"` + } `json:"data"` + } + + err := json.Unmarshal(rulesBody, &ruleResponse) + if err != nil { + return time.Time{} + } + + for _, group := range ruleResponse.Data.Groups { + for _, rule := range group.Rules { + if evalTime, err := time.Parse(time.RFC3339Nano, rule.LastEvaluation); err == nil { + return evalTime + } + } + } + + return time.Time{} +} + // queryString returns the expected queryString of a this test. func (p *queryLogTest) queryString() string { switch p.origin { @@ -322,7 +371,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Len(t, ql, qc) } else { - require.Greater(t, len(ql), qc, "no queries logged") + require.GreaterOrEqual(t, len(ql), qc, "no queries logged") } p.validateLastQuery(t, ql) qc = len(ql) @@ -353,7 +402,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Len(t, ql, qc) } else { - require.Greater(t, len(ql), qc, "no queries logged") + require.GreaterOrEqual(t, len(ql), qc, "no queries logged") } p.validateLastQuery(t, ql) @@ -393,6 +442,7 @@ func readQueryLog(t *testing.T, path string) []queryLogLine { file, err := os.Open(path) require.NoError(t, err) defer file.Close() + scanner := bufio.NewScanner(file) for scanner.Scan() { var q queryLogLine @@ -406,6 +456,7 @@ func TestQueryLog(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() cwd, err := os.Getwd() require.NoError(t, err) @@ -424,6 +475,7 @@ func TestQueryLog(t *testing.T) { } t.Run(p.String(), func(t *testing.T) { + t.Parallel() p.run(t) }) } diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 16491f0416..1408975df9 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -21,9 +21,10 @@ import ( "math" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/tsdb" @@ -120,7 +121,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn // also need to append samples throughout the whole block range. To allow that, we // pretend that the block is twice as large here, but only really add sample in the // original interval later. - w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, 2*blockDuration) + w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), outputDir, 2*blockDuration) if err != nil { return fmt.Errorf("block writer: %w", err) } diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 4d4cf6c5db..49676ee5c4 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -32,13 +32,13 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" "github.com/google/pprof/profile" "github.com/prometheus/client_golang/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil/promlint" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/exporter-toolkit/web" "gopkg.in/yaml.v2" @@ -58,6 +58,7 @@ import ( _ "github.com/prometheus/prometheus/plugins" // Register plugins. "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/promqltest" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/util/documentcli" ) @@ -216,6 +217,7 @@ func main() { "test-rule-file", "The unit test file.", ).Required().ExistingFiles() + testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool() testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool() defaultDBPath := "data/" @@ -291,7 +293,7 @@ func main() { promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String() promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String() - featureList := app.Flag("enable-feature", "Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details.").Default("").Strings() + featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Currently unused.").Default("").Strings() documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden() @@ -321,24 +323,21 @@ func main() { } } - var noDefaultScrapePort bool for _, f := range *featureList { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "no-default-scrape-port": - noDefaultScrapePort = true case "": continue default: - fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o) + fmt.Printf(" WARNING: --enable-feature is currently a no-op") } } } switch parsedCmd { case sdCheckCmd.FullCommand(): - os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer)) + os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer)) case checkConfigCmd.FullCommand(): os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...)) @@ -394,6 +393,7 @@ func main() { }, *testRulesRun, *testRulesDiff, + *testRulesDebug, *testRulesFiles...), ) @@ -578,7 +578,7 @@ func checkFileExists(fn string) error { func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) { fmt.Println("Checking", filename) - cfg, err := config.LoadFile(filename, agentMode, false, log.NewNopLogger()) + cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger()) if err != nil { return nil, err } @@ -898,30 +898,30 @@ func compare(a, b compareRuleType) int { func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { var duplicates []compareRuleType - var rules compareRuleTypes + var cRules compareRuleTypes for _, group := range groups { for _, rule := range group.Rules { - rules = append(rules, compareRuleType{ + cRules = append(cRules, compareRuleType{ metric: ruleMetric(rule), - label: labels.FromMap(rule.Labels), + label: rules.FromMaps(group.Labels, rule.Labels), }) } } - if len(rules) < 2 { + if len(cRules) < 2 { return duplicates } - sort.Sort(rules) + sort.Sort(cRules) - last := rules[0] - for i := 1; i < len(rules); i++ { - if compare(last, rules[i]) == 0 { + last := cRules[0] + for i := 1; i < len(cRules); i++ { + if compare(last, cRules[i]) == 0 { // Don't add a duplicated rule multiple times. if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 { - duplicates = append(duplicates, rules[i]) + duplicates = append(duplicates, cRules[i]) } } - last = rules[i] + last = cRules[i] } return duplicates @@ -1185,7 +1185,7 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu return fmt.Errorf("new api client error: %w", err) } - ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api) + ruleImporter := newRuleImporter(promslog.New(&promslog.Config{}), cfg, api) errs := ruleImporter.loadGroups(ctx, files) for _, err := range errs { if err != nil { @@ -1219,7 +1219,7 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c lb := labels.NewBuilder(labels.EmptyLabels()) for _, tg := range targetGroups { var failures []error - targets, failures = scrape.TargetsFromGroup(tg, scfg, false, targets, lb) + targets, failures = scrape.TargetsFromGroup(tg, scfg, targets, lb) if len(failures) > 0 { first := failures[0] return first diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 9d891c32fd..698e6641d1 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -146,7 +146,7 @@ func TestCheckSDFile(t *testing.T) { t.Run(test.name, func(t *testing.T) { _, err := checkSDFile(test.file) if test.err != "" { - require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -228,7 +228,7 @@ func TestCheckTargetConfig(t *testing.T) { t.Run(test.name, func(t *testing.T) { _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { - require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -315,7 +315,7 @@ func TestCheckConfigSyntax(t *testing.T) { expectedErrMsg = test.errWindows } if expectedErrMsg != "" { - require.Equalf(t, expectedErrMsg, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, expectedErrMsg, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -345,7 +345,7 @@ func TestAuthorizationConfig(t *testing.T) { t.Run(test.name, func(t *testing.T) { _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { - require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error()) + require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error()) return } require.NoError(t, err) diff --git a/cmd/promtool/rules.go b/cmd/promtool/rules.go index 5a18644842..adb214b812 100644 --- a/cmd/promtool/rules.go +++ b/cmd/promtool/rules.go @@ -16,12 +16,12 @@ package main import ( "context" "fmt" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" @@ -38,7 +38,7 @@ type queryRangeAPI interface { } type ruleImporter struct { - logger log.Logger + logger *slog.Logger config ruleImporterConfig apiClient queryRangeAPI @@ -57,8 +57,8 @@ type ruleImporterConfig struct { // newRuleImporter creates a new rule importer that can be used to parse and evaluate recording rule files and create new series // written to disk in blocks. -func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter { - level.Info(logger).Log("backfiller", "new rule importer", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822)) +func newRuleImporter(logger *slog.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter { + logger.Info("new rule importer", "component", "backfiller", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822)) return &ruleImporter{ logger: logger, config: config, @@ -80,10 +80,10 @@ func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string) // importAll evaluates all the recording rules and creates new time series and writes them to disk in blocks. func (importer *ruleImporter) importAll(ctx context.Context) (errs []error) { for name, group := range importer.groups { - level.Info(importer.logger).Log("backfiller", "processing group", "name", name) + importer.logger.Info("processing group", "component", "backfiller", "name", name) for i, r := range group.Rules() { - level.Info(importer.logger).Log("backfiller", "processing rule", "id", i, "name", r.Name()) + importer.logger.Info("processing rule", "component", "backfiller", "id", i, "name", r.Name()) if err := importer.importRule(ctx, r.Query().String(), r.Name(), r.Labels(), importer.config.start, importer.config.end, int64(importer.config.maxBlockDuration/time.Millisecond), group); err != nil { errs = append(errs, err) } @@ -124,7 +124,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName return fmt.Errorf("query range: %w", err) } if warnings != nil { - level.Warn(importer.logger).Log("msg", "Range query returned warnings.", "warnings", warnings) + importer.logger.Warn("Range query returned warnings.", "warnings", warnings) } // To prevent races with compaction, a block writer only allows appending samples @@ -133,7 +133,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName // also need to append samples throughout the whole block range. To allow that, we // pretend that the block is twice as large here, but only really add sample in the // original interval later. - w, err := tsdb.NewBlockWriter(log.NewNopLogger(), importer.config.outputDir, 2*blockDuration) + w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), importer.config.outputDir, 2*blockDuration) if err != nil { return fmt.Errorf("new block writer: %w", err) } diff --git a/cmd/promtool/rules_test.go b/cmd/promtool/rules_test.go index d55fb0c896..94e28e570d 100644 --- a/cmd/promtool/rules_test.go +++ b/cmd/promtool/rules_test.go @@ -21,9 +21,9 @@ import ( "testing" "time" - "github.com/go-kit/log" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" @@ -161,7 +161,7 @@ func TestBackfillRuleIntegration(t *testing.T) { } func newTestRuleImporter(_ context.Context, start time.Time, tmpDir string, testSamples model.Matrix, maxBlockDuration time.Duration) (*ruleImporter, error) { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() cfg := ruleImporterConfig{ outputDir: tmpDir, start: start.Add(-10 * time.Hour), diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index e65262d439..5e005bca8b 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -20,9 +20,9 @@ import ( "os" "time" - "github.com/go-kit/log" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" @@ -38,10 +38,10 @@ type sdCheckResult struct { } // CheckSD performs service discovery for the given job name and reports the results. -func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) +func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, registerer prometheus.Registerer) int { + logger := promslog.New(&promslog.Config{}) - cfg, err := config.LoadFile(sdConfigFiles, false, false, logger) + cfg, err := config.LoadFile(sdConfigFiles, false, logger) if err != nil { fmt.Fprintln(os.Stderr, "Cannot load config", err) return failureExitCode @@ -114,7 +114,7 @@ outerLoop: } results := []sdCheckResult{} for _, tgs := range sdCheckResults { - results = append(results, getSDCheckResult(tgs, scrapeConfig, noDefaultScrapePort)...) + results = append(results, getSDCheckResult(tgs, scrapeConfig)...) } res, err := json.MarshalIndent(results, "", " ") @@ -127,7 +127,7 @@ outerLoop: return successExitCode } -func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig, noDefaultScrapePort bool) []sdCheckResult { +func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig) []sdCheckResult { sdCheckResults := []sdCheckResult{} lb := labels.NewBuilder(labels.EmptyLabels()) for _, targetGroup := range targetGroups { @@ -144,7 +144,7 @@ func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.Sc } } - res, orig, err := scrape.PopulateLabels(lb, scrapeConfig, noDefaultScrapePort) + res, orig, err := scrape.PopulateLabels(lb, scrapeConfig) result := sdCheckResult{ DiscoveredLabels: orig, Labels: res, diff --git a/cmd/promtool/sd_test.go b/cmd/promtool/sd_test.go index cb65ee72aa..44d8084651 100644 --- a/cmd/promtool/sd_test.go +++ b/cmd/promtool/sd_test.go @@ -70,5 +70,5 @@ func TestSDCheckResult(t *testing.T) { }, } - testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig, true)) + testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig)) } diff --git a/cmd/promtool/testdata/config_with_service_discovery_files.yml b/cmd/promtool/testdata/config_with_service_discovery_files.yml index 13b6d7faff..6a550a8403 100644 --- a/cmd/promtool/testdata/config_with_service_discovery_files.yml +++ b/cmd/promtool/testdata/config_with_service_discovery_files.yml @@ -6,7 +6,7 @@ scrape_configs: alerting: alertmanagers: - scheme: http - api_version: v1 + api_version: v2 file_sd_configs: - files: - nonexistent_file.yml diff --git a/cmd/promtool/testdata/unittest.yml b/cmd/promtool/testdata/unittest.yml index 7ba890f4a7..e2a8230902 100644 --- a/cmd/promtool/testdata/unittest.yml +++ b/cmd/promtool/testdata/unittest.yml @@ -69,13 +69,13 @@ tests: eval_time: 2m exp_samples: - labels: "test_histogram_repeat" - histogram: "{{count:2 sum:3 buckets:[2]}}" + histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}" - expr: test_histogram_increase eval_time: 2m exp_samples: - labels: "test_histogram_increase" - histogram: "{{count:4 sum:5.6 buckets:[4]}}" + histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}" # Ensure a value is stale as soon as it is marked as such. - expr: test_stale diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 971ea8ab00..727275aa6b 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "runtime" @@ -32,9 +33,10 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" "go.uber.org/atomic" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/storage" @@ -60,7 +62,7 @@ type writeBenchmark struct { memprof *os.File blockprof *os.File mtxprof *os.File - logger log.Logger + logger *slog.Logger } func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) error { @@ -68,7 +70,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err outPath: outPath, samplesFile: samplesFile, numMetrics: numMetrics, - logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), + logger: promslog.New(&promslog.Config{}), } if b.outPath == "" { dir, err := os.MkdirTemp("", "tsdb_bench") @@ -87,9 +89,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err dir := filepath.Join(b.outPath, "storage") - l := log.With(b.logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller) - - st, err := tsdb.Open(dir, l, nil, &tsdb.Options{ + st, err := tsdb.Open(dir, b.logger, nil, &tsdb.Options{ RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond), MinBlockDuration: int64(2 * time.Hour / time.Millisecond), }, tsdb.NewDBStats()) @@ -367,25 +367,25 @@ func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) { fmt.Fprintf(tw, "%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n", meta.ULID, - getFormatedTime(meta.MinTime, humanReadable), - getFormatedTime(meta.MaxTime, humanReadable), + getFormattedTime(meta.MinTime, humanReadable), + getFormattedTime(meta.MaxTime, humanReadable), time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond, meta.Stats.NumSamples, meta.Stats.NumChunks, meta.Stats.NumSeries, - getFormatedBytes(b.Size(), humanReadable), + getFormattedBytes(b.Size(), humanReadable), ) } } -func getFormatedTime(timestamp int64, humanReadable bool) string { +func getFormattedTime(timestamp int64, humanReadable bool) string { if humanReadable { return time.Unix(timestamp/1000, 0).UTC().String() } return strconv.FormatInt(timestamp, 10) } -func getFormatedBytes(bytes int64, humanReadable bool) string { +func getFormattedBytes(bytes int64, humanReadable bool) string { if humanReadable { return units.Base2Bytes(bytes).String() } diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 7030635d1c..78dacdc569 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -26,13 +26,13 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/google/go-cmp/cmp" "github.com/grafana/regexp" "github.com/nsf/jsondiff" "gopkg.in/yaml.v2" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -46,11 +46,11 @@ import ( // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { - return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, files...) +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { + return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, files...) } -func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { +func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { failed := false junit := &junitxml.JUnitXML{} @@ -60,7 +60,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, } for _, f := range files { - if errs := ruleUnitTest(f, queryOpts, run, diffFlag, junit.Suite(f)); errs != nil { + if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, junit.Suite(f)); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) @@ -82,7 +82,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, return successExitCode } -func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool, ts *junitxml.TestSuite) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug bool, ts *junitxml.TestSuite) []error { b, err := os.ReadFile(filename) if err != nil { ts.Abort(err) @@ -131,7 +131,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg if t.Interval == 0 { t.Interval = unitTestInp.EvaluationInterval } - ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...) + ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, unitTestInp.RuleFiles...) if ers != nil { for _, e := range ers { tc.Fail(e.Error()) @@ -198,7 +198,14 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug bool, ruleFiles ...string) (outErr []error) { + if debug { + testStart := time.Now() + fmt.Printf("DEBUG: Starting test %s\n", testname) + defer func() { + fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart)) + }() + } // Setup testing suite. suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts) if err != nil { @@ -218,7 +225,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i Appendable: suite.Storage(), Context: context.Background(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } m := rules.NewManager(opts) groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ruleFiles...) @@ -482,6 +489,32 @@ Outer: } } + if debug { + ts := tg.maxEvalTime() + // Potentially a test can be specified at a time with fractional seconds, + // which PromQL cannot represent, so round up to the next whole second. + ts = (ts + time.Second).Truncate(time.Second) + expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts) + q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts)) + if err != nil { + fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err) + return errs + } + res := q.Exec(suite.Context()) + if res.Err != nil { + fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err) + return errs + } + switch v := res.Value.(type) { + case promql.Matrix: + fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts) + fmt.Println(v.String()) + default: + fmt.Printf("DEBUG: Got unexpected type %T\n", v) + return errs + } + } + if len(errs) > 0 { return errs } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index 9bbac28e9f..9b73dcdc1c 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -141,14 +141,14 @@ func TestRulesUnitTest(t *testing.T) { reuseCount[tt.want] += len(tt.args.files) } t.Run(tt.name, func(t *testing.T) { - if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want { + if got := RulesUnitTest(tt.queryOpts, nil, false, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) } t.Run("Junit xml output ", func(t *testing.T) { var buf bytes.Buffer - if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, reuseFiles...); got != 1 { + if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, reuseFiles...); got != 1 { t.Errorf("RulesUnitTestResults() = %v, want 1", got) } var test junitxml.JUnitXML @@ -230,7 +230,7 @@ func TestRulesUnitTestRun(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...) + got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.args.files...) require.Equal(t, tt.want, got) }) } diff --git a/config/config.go b/config/config.go index 6dcb461026..30a74e0402 100644 --- a/config/config.go +++ b/config/config.go @@ -16,6 +16,8 @@ package config import ( "errors" "fmt" + "log/slog" + "mime" "net/url" "os" "path/filepath" @@ -25,8 +27,6 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -73,7 +73,7 @@ const ( ) // Load parses the YAML input s into a Config. -func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) { +func Load(s string, logger *slog.Logger) (*Config, error) { cfg := &Config{} // If the entire config body is empty the UnmarshalYAML method is // never called. We thus have to set the DefaultConfig at the entry @@ -85,10 +85,6 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro return nil, err } - if !expandExternalLabels { - return cfg, nil - } - b := labels.NewScratchBuilder(0) cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) { newV := os.Expand(v.Value, func(s string) string { @@ -98,26 +94,28 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro if v := os.Getenv(s); v != "" { return v } - level.Warn(logger).Log("msg", "Empty environment variable", "name", s) + logger.Warn("Empty environment variable", "name", s) return "" }) if newV != v.Value { - level.Debug(logger).Log("msg", "External label replaced", "label", v.Name, "input", v.Value, "output", newV) + logger.Debug("External label replaced", "label", v.Name, "input", v.Value, "output", newV) } // Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024 b.Add(v.Name, newV) }) - cfg.GlobalConfig.ExternalLabels = b.Labels() + if !b.Labels().IsEmpty() { + cfg.GlobalConfig.ExternalLabels = b.Labels() + } return cfg, nil } // LoadFile parses the given YAML file into a Config. -func LoadFile(filename string, agentMode, expandExternalLabels bool, logger log.Logger) (*Config, error) { +func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) { content, err := os.ReadFile(filename) if err != nil { return nil, err } - cfg, err := Load(string(content), expandExternalLabels, logger) + cfg, err := Load(string(content), logger) if err != nil { return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err) } @@ -166,13 +164,13 @@ var ( // DefaultScrapeConfig is the default scrape configuration. DefaultScrapeConfig = ScrapeConfig{ // ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals. - ScrapeClassicHistograms: false, - MetricsPath: "/metrics", - Scheme: "http", - HonorLabels: false, - HonorTimestamps: true, - HTTPClientConfig: config.DefaultHTTPClientConfig, - EnableCompression: true, + AlwaysScrapeClassicHistograms: false, + MetricsPath: "/metrics", + Scheme: "http", + HonorLabels: false, + HonorTimestamps: true, + HTTPClientConfig: config.DefaultHTTPClientConfig, + EnableCompression: true, } // DefaultAlertmanagerConfig is the default alertmanager configuration. @@ -183,13 +181,18 @@ var ( HTTPClientConfig: config.DefaultHTTPClientConfig, } + DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{ + FollowRedirects: true, + EnableHTTP2: false, + } + // DefaultRemoteWriteConfig is the default remote write configuration. DefaultRemoteWriteConfig = RemoteWriteConfig{ RemoteTimeout: model.Duration(30 * time.Second), ProtobufMessage: RemoteWriteProtoMsgV1, QueueConfig: DefaultQueueConfig, MetadataConfig: DefaultMetadataConfig, - HTTPClientConfig: config.DefaultHTTPClientConfig, + HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig, } // DefaultQueueConfig is the default remote queue configuration. @@ -476,9 +479,22 @@ func (s ScrapeProtocol) Validate() error { return nil } +// HeaderMediaType returns the MIME mediaType for a particular ScrapeProtocol. +func (s ScrapeProtocol) HeaderMediaType() string { + if _, ok := ScrapeProtocolsHeaders[s]; !ok { + return "" + } + mediaType, _, err := mime.ParseMediaType(ScrapeProtocolsHeaders[s]) + if err != nil { + return "" + } + return mediaType +} + var ( PrometheusProto ScrapeProtocol = "PrometheusProto" PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4" + PrometheusText1_0_0 ScrapeProtocol = "PrometheusText1.0.0" OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1" OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0" UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8 @@ -486,6 +502,7 @@ var ( ScrapeProtocolsHeaders = map[ScrapeProtocol]string{ PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited", PrometheusText0_0_4: "text/plain;version=0.0.4", + PrometheusText1_0_0: "text/plain;version=1.0.0;escaping=allow-utf-8", OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1", OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0", } @@ -495,6 +512,7 @@ var ( DefaultScrapeProtocols = []ScrapeProtocol{ OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } @@ -506,6 +524,7 @@ var ( PrometheusProto, OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } ) @@ -632,10 +651,17 @@ type ScrapeConfig struct { // The protocols to negotiate during a scrape. It tells clients what // protocol are accepted by Prometheus and with what preference (most wanted is first). // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, - // OpenMetricsText1.0.0, PrometheusText0.0.4. + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"` - // Whether to scrape a classic histogram that is also exposed as a native histogram. - ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"` + // The fallback protocol to use if the Content-Type provided by the target + // is not provided, blank, or not one of the expected values. + // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. + ScrapeFallbackProtocol ScrapeProtocol `yaml:"fallback_scrape_protocol,omitempty"` + // Whether to scrape a classic histogram, even if it is also exposed as a native histogram. + AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"` + // Whether to convert all scraped classic histograms into a native histogram with custom buckets. + ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"` // File to which scrape failures are logged. ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"` // The HTTP resource path on which to fetch metrics from targets. @@ -783,6 +809,12 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName) } + if c.ScrapeFallbackProtocol != "" { + if err := c.ScrapeFallbackProtocol.Validate(); err != nil { + return fmt.Errorf("invalid fallback_scrape_protocol for scrape config with job name %q: %w", c.JobName, err) + } + } + switch globalConfig.MetricNameValidationScheme { case LegacyValidationConfig: case "", UTF8ValidationConfig: @@ -958,6 +990,7 @@ func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig { // AlertmanagerAPIVersion represents a version of the // github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'. +// 'v1' is no longer supported. type AlertmanagerAPIVersion string // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -987,7 +1020,7 @@ const ( ) var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{ - AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2, + AlertmanagerAPIVersionV2, } // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. diff --git a/config/config_test.go b/config/config_test.go index 66377f6879..c3148f93a7 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -24,10 +24,10 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -142,7 +142,7 @@ var expectedConf = &Config{ }, }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, }, { @@ -158,7 +158,7 @@ var expectedConf = &Config{ KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, Headers: map[string]string{"name": "value"}, }, @@ -206,19 +206,20 @@ var expectedConf = &Config{ { JobName: "prometheus", - HonorLabels: true, - HonorTimestamps: true, - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, - EnableCompression: true, - BodySizeLimit: globBodySizeLimit, - SampleLimit: globSampleLimit, - TargetLimit: globTargetLimit, - LabelLimit: globLabelLimit, - LabelNameLengthLimit: globLabelNameLengthLimit, - LabelValueLengthLimit: globLabelValueLengthLimit, - ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, - ScrapeFailureLogFile: "testdata/fail_prom.log", + HonorLabels: true, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + EnableCompression: true, + BodySizeLimit: globBodySizeLimit, + SampleLimit: globSampleLimit, + TargetLimit: globTargetLimit, + LabelLimit: globLabelLimit, + LabelNameLengthLimit: globLabelNameLengthLimit, + LabelValueLengthLimit: globLabelValueLengthLimit, + ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFallbackProtocol: PrometheusText0_0_4, + ScrapeFailureLogFile: "testdata/fail_prom.log", MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1500,8 +1501,13 @@ var expectedConf = &Config{ }, } +func TestYAMLNotLongerSupportedAMApi(t *testing.T) { + _, err := LoadFile("testdata/config_with_no_longer_supported_am_api_config.yml", false, promslog.NewNopLogger()) + require.Error(t, err) +} + func TestYAMLRoundtrip(t *testing.T) { - want, err := LoadFile("testdata/roundtrip.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1514,7 +1520,7 @@ func TestYAMLRoundtrip(t *testing.T) { } func TestRemoteWriteRetryOnRateLimit(t *testing.T) { - want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1529,7 +1535,7 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) { func TestOTLPSanitizeResourceAttributes(t *testing.T) { t.Run("good config", func(t *testing.T) { - want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, false, log.NewNopLogger()) + want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1541,7 +1547,7 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { }) t.Run("bad config", func(t *testing.T) { - _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, false, log.NewNopLogger()) + _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, promslog.NewNopLogger()) require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`) require.ErrorContains(t, err, `empty promoted OTel resource attribute`) }) @@ -1550,16 +1556,17 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. - _, err := LoadFile("testdata/global_timeout.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) + require.NoError(t, err) require.Equal(t, expectedConf, c) } func TestScrapeIntervalLarger(t *testing.T) { - c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.ScrapeConfigs, 1) for _, sc := range c.ScrapeConfigs { @@ -1569,7 +1576,7 @@ func TestScrapeIntervalLarger(t *testing.T) { // YAML marshaling must not reveal authentication credentials. func TestElideSecrets(t *testing.T) { - c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) secretRe := regexp.MustCompile(`\\u003csecret\\u003e|`) @@ -1586,31 +1593,31 @@ func TestElideSecrets(t *testing.T) { func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { // Parse a valid file that sets a rule files with an absolute path - c, err := LoadFile(ruleFilesConfigFile, false, false, log.NewNopLogger()) + c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger()) require.NoError(t, err) require.Equal(t, ruleFilesExpectedConf, c) } func TestKubernetesEmptyAPIServer(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesWithKubeConfig(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesSelectors(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } @@ -2080,12 +2087,20 @@ var expectedErrors = []struct { }, { filename: "scrape_config_files_scrape_protocols.bad.yml", - errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`, + errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0] for scrape config with job name "node"`, }, { filename: "scrape_config_files_scrape_protocols2.bad.yml", errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`, }, + { + filename: "scrape_config_files_fallback_scrape_protocol1.bad.yml", + errMsg: `parsing YAML file testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml: invalid fallback_scrape_protocol for scrape config with job name "node": unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0]`, + }, + { + filename: "scrape_config_files_fallback_scrape_protocol2.bad.yml", + errMsg: `unmarshal errors`, + }, } func TestBadConfigs(t *testing.T) { @@ -2094,9 +2109,8 @@ func TestBadConfigs(t *testing.T) { model.NameValidationScheme = model.UTF8Validation }() for _, ee := range expectedErrors { - _, err := LoadFile("testdata/"+ee.filename, false, false, log.NewNopLogger()) - require.Error(t, err, "%s", ee.filename) - require.Contains(t, err.Error(), ee.errMsg, + _, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger()) + require.ErrorContains(t, err, ee.errMsg, "Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err) } } @@ -2126,7 +2140,7 @@ func TestBadStaticConfigsYML(t *testing.T) { } func TestEmptyConfig(t *testing.T) { - c, err := Load("", false, log.NewNopLogger()) + c, err := Load("", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig require.Equal(t, exp, *c) @@ -2136,38 +2150,34 @@ func TestExpandExternalLabels(t *testing.T) { // Cleanup ant TEST env variable that could exist on the system. os.Setenv("TEST", "") - c, err := LoadFile("testdata/external_labels.good.yml", false, false, log.NewNopLogger()) - require.NoError(t, err) - testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels) - - c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger()) + c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) os.Setenv("TEST", "TestValue") - c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger()) + c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) } func TestAgentMode(t *testing.T) { - _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, log.NewNopLogger()) + _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field rule_files is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field remote_read is not allowed in agent mode") - c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger()) + c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Empty(t, c.RemoteWriteConfigs) - c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger()) + c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.RemoteWriteConfigs, 1) require.Equal( @@ -2178,7 +2188,7 @@ func TestAgentMode(t *testing.T) { } func TestEmptyGlobalBlock(t *testing.T) { - c, err := Load("global:\n", false, log.NewNopLogger()) + c, err := Load("global:\n", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig exp.Runtime = DefaultRuntimeConfig @@ -2333,7 +2343,7 @@ func TestGetScrapeConfigs(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - c, err := LoadFile(tc.configFile, false, false, log.NewNopLogger()) + c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger()) require.NoError(t, err) scfgs, err := c.GetScrapeConfigs() @@ -2351,7 +2361,7 @@ func kubernetesSDHostURL() config.URL { } func TestScrapeConfigDisableCompression(t *testing.T) { - want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2398,7 +2408,7 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, log.NewNopLogger()) + want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2411,3 +2421,54 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { }) } } + +func TestScrapeProtocolHeader(t *testing.T) { + tests := []struct { + name string + proto ScrapeProtocol + expectedValue string + }{ + { + name: "blank", + proto: ScrapeProtocol(""), + expectedValue: "", + }, + { + name: "invalid", + proto: ScrapeProtocol("invalid"), + expectedValue: "", + }, + { + name: "prometheus protobuf", + proto: PrometheusProto, + expectedValue: "application/vnd.google.protobuf", + }, + { + name: "prometheus text 0.0.4", + proto: PrometheusText0_0_4, + expectedValue: "text/plain", + }, + { + name: "prometheus text 1.0.0", + proto: PrometheusText1_0_0, + expectedValue: "text/plain", + }, + { + name: "openmetrics 0.0.1", + proto: OpenMetricsText0_0_1, + expectedValue: "application/openmetrics-text", + }, + { + name: "openmetrics 1.0.0", + proto: OpenMetricsText1_0_0, + expectedValue: "application/openmetrics-text", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + mediaType := tc.proto.HeaderMediaType() + + require.Equal(t, tc.expectedValue, mediaType) + }) + } +} diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 9eb7995432..2501652d5b 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -74,6 +74,8 @@ scrape_configs: # metrics_path defaults to '/metrics' # scheme defaults to 'http'. + fallback_scrape_protocol: PrometheusText0.0.4 + scrape_failure_log_file: fail_prom.log file_sd_configs: - files: diff --git a/config/testdata/config_with_deprecated_am_api_config.yml b/config/testdata/config_with_deprecated_am_api_config.yml new file mode 100644 index 0000000000..ac89537ff1 --- /dev/null +++ b/config/testdata/config_with_deprecated_am_api_config.yml @@ -0,0 +1,7 @@ +alerting: + alertmanagers: + - scheme: http + api_version: v1 + file_sd_configs: + - files: + - nonexistent_file.yml diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml new file mode 100644 index 0000000000..07cfe47594 --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: "prometheusproto" + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml new file mode 100644 index 0000000000..c5d133f9c4 --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: ["OpenMetricsText1.0.0", "PrometheusText0.0.4"] + static_configs: + - targets: ['localhost:8080'] diff --git a/discovery/README.md b/discovery/README.md index 4c06608625..d5418e7fb1 100644 --- a/discovery/README.md +++ b/discovery/README.md @@ -233,7 +233,7 @@ type Config interface { } type DiscovererOptions struct { - Logger log.Logger + Logger *slog.Logger // A registerer for the Discoverer's metrics. Registerer prometheus.Registerer diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index a44912481a..5a725cb48f 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" @@ -29,11 +30,11 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -146,9 +147,9 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // the Discoverer interface. type EC2Discovery struct { *refresh.Discovery - logger log.Logger + logger *slog.Logger cfg *EC2SDConfig - ec2 *ec2.EC2 + ec2 ec2iface.EC2API // azToAZID maps this account's availability zones to their underlying AZ // ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so @@ -157,14 +158,14 @@ type EC2Discovery struct { } // NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets. -func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) { +func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) { m, ok := metrics.(*ec2Metrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } d := &EC2Discovery{ logger: logger, @@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.Dis return d, nil } -func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) { +func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) { if d.ec2 != nil { return d.ec2, nil } @@ -254,8 +255,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error // Prometheus requires a reload if AWS adds a new AZ to the region. if d.azToAZID == nil { if err := d.refreshAZIDs(ctx); err != nil { - level.Debug(d.logger).Log( - "msg", "Unable to describe availability zones", + d.logger.Debug( + "Unable to describe availability zones", "err", err) } } @@ -296,8 +297,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) azID, ok := d.azToAZID[*inst.Placement.AvailabilityZone] if !ok && d.azToAZID != nil { - level.Debug(d.logger).Log( - "msg", "Availability zone ID not found", + d.logger.Debug( + "Availability zone ID not found", "az", *inst.Placement.AvailabilityZone) } labels[ec2LabelAZID] = model.LabelValue(azID) diff --git a/discovery/aws/ec2_test.go b/discovery/aws/ec2_test.go new file mode 100644 index 0000000000..f34065c23e --- /dev/null +++ b/discovery/aws/ec2_test.go @@ -0,0 +1,434 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aws + +import ( + "context" + "errors" + "testing" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery/targetgroup" +) + +// Helper function to get pointers on literals. +// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package. +func strptr(str string) *string { + return &str +} + +func boolptr(b bool) *bool { + return &b +} + +func int64ptr(i int64) *int64 { + return &i +} + +// Struct for test data. +type ec2DataStore struct { + region string + + azToAZID map[string]string + + ownerID string + + instances []*ec2.Instance +} + +// The tests itself. +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} + +func TestEC2DiscoveryRefreshAZIDs(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + shouldFail bool + ec2Data *ec2DataStore + }{ + { + name: "Normal", + shouldFail: false, + ec2Data: &ec2DataStore{ + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + }, + }, + { + name: "HandleError", + shouldFail: true, + ec2Data: &ec2DataStore{}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + } + + err := d.refreshAZIDs(ctx) + if tt.shouldFail { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, client.ec2Data.azToAZID, d.azToAZID) + } + }) + } +} + +func TestEC2DiscoveryRefresh(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + ec2Data *ec2DataStore + expected []*targetgroup.Group + }{ + { + name: "NoPrivateIp", + ec2Data: &ec2DataStore{ + region: "region-noprivateip", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + InstanceId: strptr("instance-id-noprivateip"), + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-noprivateip", + }, + }, + }, + { + name: "NoVpc", + ec2Data: &ec2DataStore{ + region: "region-novpc", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + ownerID: "owner-id-novpc", + instances: []*ec2.Instance{ + { + // set every possible options and test them here + Architecture: strptr("architecture-novpc"), + ImageId: strptr("ami-novpc"), + InstanceId: strptr("instance-id-novpc"), + InstanceLifecycle: strptr("instance-lifecycle-novpc"), + InstanceType: strptr("instance-type-novpc"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + Platform: strptr("platform-novpc"), + PrivateDnsName: strptr("private-dns-novpc"), + PrivateIpAddress: strptr("1.2.3.4"), + PublicDnsName: strptr("public-dns-novpc"), + PublicIpAddress: strptr("42.42.42.2"), + State: &ec2.InstanceState{Name: strptr("running")}, + // test tags once and for all + Tags: []*ec2.Tag{ + {Key: strptr("tag-1-key"), Value: strptr("tag-1-value")}, + {Key: strptr("tag-2-key"), Value: strptr("tag-2-value")}, + nil, + {Value: strptr("tag-4-value")}, + {Key: strptr("tag-5-key")}, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-novpc", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("1.2.3.4:4242"), + "__meta_ec2_ami": model.LabelValue("ami-novpc"), + "__meta_ec2_architecture": model.LabelValue("architecture-novpc"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"), + "__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"), + "__meta_ec2_platform": model.LabelValue("platform-novpc"), + "__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"), + "__meta_ec2_private_ip": model.LabelValue("1.2.3.4"), + "__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"), + "__meta_ec2_public_ip": model.LabelValue("42.42.42.2"), + "__meta_ec2_region": model.LabelValue("region-novpc"), + "__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"), + "__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"), + }, + }, + }, + }, + }, + { + name: "Ipv4", + ec2Data: &ec2DataStore{ + region: "region-ipv4", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv4"), + InstanceId: strptr("instance-id-ipv4"), + InstanceType: strptr("instance-type-ipv4"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")}, + PrivateIpAddress: strptr("5.6.7.8"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-3"), + VpcId: strptr("vpc-ipv4"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without subnet -> should be ignored + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:1::1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + }, + // interface with subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + // interface with another subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-1"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv4", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("5.6.7.8:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv4"), + "__meta_ec2_availability_zone": model.LabelValue("azname-c"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-3"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"), + "__meta_ec2_private_ip": model.LabelValue("5.6.7.8"), + "__meta_ec2_region": model.LabelValue("region-ipv4"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"), + }, + }, + }, + }, + }, + { + name: "Ipv6", + ec2Data: &ec2DataStore{ + region: "region-ipv6", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv6"), + InstanceId: strptr("instance-id-ipv6"), + InstanceType: strptr("instance-type-ipv6"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + PrivateIpAddress: strptr("9.10.11.12"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-2"), + VpcId: strptr("vpc-ipv6"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without primary IPv6, index 2 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::1:1"), + IsPrimaryIpv6: boolptr(false), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 1 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(1), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::2:1"), + IsPrimaryIpv6: boolptr(false), + }, + { + Ipv6Address: strptr("2001:db8:2::2:2"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 3 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::3:1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-1"), + }, + // interface without primary IPv6, index 0 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(0), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv6", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("9.10.11.12:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv6"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"), + "__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"), + "__meta_ec2_private_ip": model.LabelValue("9.10.11.12"), + "__meta_ec2_region": model.LabelValue("region-ipv6"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"), + }, + }, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + cfg: &EC2SDConfig{ + Port: 4242, + Region: client.ec2Data.region, + }, + } + + g, err := d.refresh(ctx) + require.NoError(t, err) + require.Equal(t, tt.expected, g) + }) + } +} + +// EC2 client mock. +type mockEC2Client struct { + ec2iface.EC2API + ec2Data ec2DataStore +} + +func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client { + client := mockEC2Client{ + ec2Data: *ec2Data, + } + return &client +} + +func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) { + if len(m.ec2Data.azToAZID) == 0 { + return nil, errors.New("No AZs found") + } + + azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID)) + + i := 0 + for k, v := range m.ec2Data.azToAZID { + azs[i] = &ec2.AvailabilityZone{ + ZoneName: strptr(k), + ZoneId: strptr(v), + } + i++ + } + + return &ec2.DescribeAvailabilityZonesOutput{ + AvailabilityZones: azs, + }, nil +} + +func (m *mockEC2Client) DescribeInstancesPagesWithContext(ctx aws.Context, input *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, opts ...request.Option) error { + r := ec2.Reservation{} + r.SetInstances(m.ec2Data.instances) + r.SetOwnerId(m.ec2Data.ownerID) + + o := ec2.DescribeInstancesOutput{} + o.SetReservations([]*ec2.Reservation{&r}) + + _ = fn(&o, true) + + return nil +} diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index 0ad7f2d541..0b046be6d9 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" @@ -29,10 +30,10 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/lightsail" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -130,14 +131,14 @@ type LightsailDiscovery struct { } // NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets. -func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) { +func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) { m, ok := metrics.(*lightsailMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } d := &LightsailDiscovery{ diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 70d95b9f3a..35bbc3847c 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "math/rand" "net" "net/http" @@ -35,10 +36,9 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" cache "github.com/Code-Hex/go-generics-cache" "github.com/Code-Hex/go-generics-cache/policy/lru" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -175,7 +175,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type Discovery struct { *refresh.Discovery - logger log.Logger + logger *slog.Logger cfg *SDConfig port int cache *cache.Cache[string, *armnetwork.Interface] @@ -183,14 +183,14 @@ type Discovery struct { } // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. -func NewDiscovery(cfg *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*azureMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000))) d := &Discovery{ @@ -228,26 +228,26 @@ type azureClient struct { vm *armcompute.VirtualMachinesClient vmss *armcompute.VirtualMachineScaleSetsClient vmssvm *armcompute.VirtualMachineScaleSetVMsClient - logger log.Logger + logger *slog.Logger } var _ client = &azureClient{} -// createAzureClient is a helper function for creating an Azure compute client to ARM. -func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { - cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment) +// createAzureClient is a helper method for creating an Azure compute client to ARM. +func (d *Discovery) createAzureClient() (client, error) { + cloudConfiguration, err := CloudConfigurationFromName(d.cfg.Environment) if err != nil { return &azureClient{}, err } var c azureClient - c.logger = logger + c.logger = d.logger telemetry := policy.TelemetryOptions{ ApplicationID: userAgent, } - credential, err := newCredential(cfg, policy.ClientOptions{ + credential, err := newCredential(*d.cfg, policy.ClientOptions{ Cloud: cloudConfiguration, Telemetry: telemetry, }) @@ -255,7 +255,7 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { return &azureClient{}, err } - client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd") + client, err := config_util.NewClientFromConfig(d.cfg.HTTPClientConfig, "azure_sd") if err != nil { return &azureClient{}, err } @@ -267,22 +267,22 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { }, } - c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options) + c.vm, err = armcompute.NewVirtualMachinesClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options) + c.nic, err = armnetwork.NewInterfacesClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options) + c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options) + c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } @@ -337,35 +337,27 @@ type virtualMachine struct { } // Create a new azureResource object from an ID string. -func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, error) { +func newAzureResourceFromID(id string, logger *slog.Logger) (*arm.ResourceID, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } resourceID, err := arm.ParseResourceID(id) if err != nil { err := fmt.Errorf("invalid ID '%s': %w", id, err) - level.Error(logger).Log("err", err) + logger.Error("Failed to parse resource ID", "err", err) return &arm.ResourceID{}, err } return resourceID, nil } -func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { - defer level.Debug(d.logger).Log("msg", "Azure discovery completed") - - client, err := createAzureClient(*d.cfg, d.logger) - if err != nil { - d.metrics.failuresCount.Inc() - return nil, fmt.Errorf("could not create Azure client: %w", err) - } - +func (d *Discovery) refreshAzureClient(ctx context.Context, client client) ([]*targetgroup.Group, error) { machines, err := client.getVMs(ctx, d.cfg.ResourceGroup) if err != nil { d.metrics.failuresCount.Inc() return nil, fmt.Errorf("could not get virtual machines: %w", err) } - level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines)) + d.logger.Debug("Found virtual machines during Azure discovery.", "count", len(machines)) // Load the vms managed by scale sets. scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup) @@ -418,6 +410,18 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { return []*targetgroup.Group{&tg}, nil } +func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { + defer d.logger.Debug("Azure discovery completed") + + client, err := d.createAzureClient() + if err != nil { + d.metrics.failuresCount.Inc() + return nil, fmt.Errorf("could not create Azure client: %w", err) + } + + return d.refreshAzureClient(ctx, client) +} + func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualMachine) (model.LabelSet, error) { r, err := newAzureResourceFromID(vm.ID, d.logger) if err != nil { @@ -459,7 +463,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM } if err != nil { if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + d.logger.Warn("Network interface does not exist", "name", nicID, "err", err) } else { return nil, err } @@ -480,7 +484,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM // yet support this. On deallocated machines, this value happens to be nil so it // is a cheap and easy way to determine if a machine is allocated or not. if networkInterface.Properties.Primary == nil { - level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name) + d.logger.Debug("Skipping deallocated virtual machine", "machine", vm.Name) return nil, nil } @@ -724,7 +728,7 @@ func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) { rs := time.Duration(random) * time.Second exptime := time.Duration(d.cfg.RefreshInterval*10) + rs d.cache.Set(nicID, netInt, cache.WithExpiration(exptime)) - level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds()) + d.logger.Debug("Adding nic", "nic", nicID, "time", exptime.Seconds()) } // getFromCache will get the network Interface for the specified nicID diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go index 32dab66c8c..b905e9fcef 100644 --- a/discovery/azure/azure_test.go +++ b/discovery/azure/azure_test.go @@ -15,19 +15,34 @@ package azure import ( "context" - "fmt" + "log/slog" + "net/http" + "slices" + "strings" "testing" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" + fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" + fakenetwork "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4/fake" cache "github.com/Code-Hex/go-generics-cache" "github.com/Code-Hex/go-generics-cache/policy/lru" - "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery" + "github.com/prometheus/prometheus/discovery/targetgroup" ) +const defaultMockNetworkID string = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}" + func TestMain(m *testing.M) { goleak.VerifyTestMain(m, goleak.IgnoreTopFunction("github.com/Code-Hex/go-generics-cache.(*janitor).run.func1"), @@ -96,13 +111,12 @@ func TestVMToLabelSet(t *testing.T) { vmType := "type" location := "westeurope" computerName := "computer_name" - networkID := "/subscriptions/00000000-0000-0000-0000-000000000000/network1" ipAddress := "10.20.30.40" primary := true networkProfile := armcompute.NetworkProfile{ NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ { - ID: &networkID, + ID: to.Ptr(defaultMockNetworkID), Properties: &armcompute.NetworkInterfaceReferenceProperties{Primary: &primary}, }, }, @@ -139,7 +153,7 @@ func TestVMToLabelSet(t *testing.T) { Location: location, OsType: "Linux", Tags: map[string]*string{}, - NetworkInterfaces: []string{networkID}, + NetworkInterfaces: []string{defaultMockNetworkID}, Size: size, } @@ -150,11 +164,12 @@ func TestVMToLabelSet(t *testing.T) { cfg := DefaultSDConfig d := &Discovery{ cfg: &cfg, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), cache: cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5))), } network := armnetwork.Interface{ - Name: &networkID, + Name: to.Ptr(defaultMockNetworkID), + ID: to.Ptr(defaultMockNetworkID), Properties: &armnetwork.InterfacePropertiesFormat{ Primary: &primary, IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ @@ -164,9 +179,9 @@ func TestVMToLabelSet(t *testing.T) { }, }, } - client := &mockAzureClient{ - networkInterface: &network, - } + + client := createMockAzureClient(t, nil, nil, nil, network, nil) + labelSet, err := d.vmToLabelSet(context.Background(), client, actualVM) require.NoError(t, err) require.Len(t, labelSet, 11) @@ -475,34 +490,372 @@ func TestNewAzureResourceFromID(t *testing.T) { } } +func TestAzureRefresh(t *testing.T) { + tests := []struct { + scenario string + vmResp []armcompute.VirtualMachinesClientListAllResponse + vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse + vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse + interfacesResp armnetwork.Interface + expectedTG []*targetgroup.Group + }{ + { + scenario: "VMs, VMSS and VMSSVMs in Multiple Responses", + vmResp: []armcompute.VirtualMachinesClientListAllResponse{ + { + VirtualMachineListResult: armcompute.VirtualMachineListResult{ + Value: []*armcompute.VirtualMachine{ + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1"), to.Ptr("vm1")), + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2"), to.Ptr("vm2")), + }, + }, + }, + { + VirtualMachineListResult: armcompute.VirtualMachineListResult{ + Value: []*armcompute.VirtualMachine{ + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3"), to.Ptr("vm3")), + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4"), to.Ptr("vm4")), + }, + }, + }, + }, + vmssResp: []armcompute.VirtualMachineScaleSetsClientListAllResponse{ + { + VirtualMachineScaleSetListWithLinkResult: armcompute.VirtualMachineScaleSetListWithLinkResult{ + Value: []*armcompute.VirtualMachineScaleSet{ + { + ID: to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1"), + Name: to.Ptr("vmScaleSet1"), + Location: to.Ptr("australiaeast"), + Type: to.Ptr("Microsoft.Compute/virtualMachineScaleSets"), + }, + }, + }, + }, + }, + vmssvmResp: []armcompute.VirtualMachineScaleSetVMsClientListResponse{ + { + VirtualMachineScaleSetVMListResult: armcompute.VirtualMachineScaleSetVMListResult{ + Value: []*armcompute.VirtualMachineScaleSetVM{ + defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1"), to.Ptr("vmScaleSet1_vm1")), + defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2"), to.Ptr("vmScaleSet1_vm2")), + }, + }, + }, + }, + interfacesResp: armnetwork.Interface{ + ID: to.Ptr(defaultMockNetworkID), + Properties: &armnetwork.InterfacePropertiesFormat{ + Primary: to.Ptr(true), + IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ + {Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{ + PrivateIPAddress: to.Ptr("10.0.0.1"), + }}, + }, + }, + }, + expectedTG: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm1", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm2", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm3", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm4", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vmScaleSet1_vm1", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_scale_set": "vmScaleSet1", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vmScaleSet1_vm2", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_scale_set": "vmScaleSet1", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.scenario, func(t *testing.T) { + t.Parallel() + azureSDConfig := &DefaultSDConfig + + azureClient := createMockAzureClient(t, tc.vmResp, tc.vmssResp, tc.vmssvmResp, tc.interfacesResp, nil) + + reg := prometheus.NewRegistry() + refreshMetrics := discovery.NewRefreshMetrics(reg) + metrics := azureSDConfig.NewDiscovererMetrics(reg, refreshMetrics) + + sd, err := NewDiscovery(azureSDConfig, nil, metrics) + require.NoError(t, err) + + tg, err := sd.refreshAzureClient(context.Background(), azureClient) + require.NoError(t, err) + + sortTargetsByID(tg[0].Targets) + require.Equal(t, tc.expectedTG, tg) + }) + } +} + type mockAzureClient struct { - networkInterface *armnetwork.Interface + azureClient } -var _ client = &mockAzureClient{} +func createMockAzureClient(t *testing.T, vmResp []armcompute.VirtualMachinesClientListAllResponse, vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse, vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse, interfaceResp armnetwork.Interface, logger *slog.Logger) client { + t.Helper() + mockVMServer := defaultMockVMServer(vmResp) + mockVMSSServer := defaultMockVMSSServer(vmssResp) + mockVMScaleSetVMServer := defaultMockVMSSVMServer(vmssvmResp) + mockInterfaceServer := defaultMockInterfaceServer(interfaceResp) -func (*mockAzureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) { - return nil, nil -} + vmClient, err := armcompute.NewVirtualMachinesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachinesServerTransport(&mockVMServer), + }, + }) + require.NoError(t, err) -func (*mockAzureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) { - return nil, nil -} + vmssClient, err := armcompute.NewVirtualMachineScaleSetsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachineScaleSetsServerTransport(&mockVMSSServer), + }, + }) + require.NoError(t, err) -func (*mockAzureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) { - return nil, nil -} + vmssvmClient, err := armcompute.NewVirtualMachineScaleSetVMsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachineScaleSetVMsServerTransport(&mockVMScaleSetVMServer), + }, + }) + require.NoError(t, err) -func (m *mockAzureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) { - if networkInterfaceID == "" { - return nil, fmt.Errorf("parameter networkInterfaceID cannot be empty") + interfacesClient, err := armnetwork.NewInterfacesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fakenetwork.NewInterfacesServerTransport(&mockInterfaceServer), + }, + }) + require.NoError(t, err) + + return &mockAzureClient{ + azureClient: azureClient{ + vm: vmClient, + vmss: vmssClient, + vmssvm: vmssvmClient, + nic: interfacesClient, + logger: logger, + }, } - return m.networkInterface, nil } -func (m *mockAzureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) { - if scaleSetName == "" { - return nil, fmt.Errorf("parameter virtualMachineScaleSetName cannot be empty") +func defaultMockInterfaceServer(interfaceResp armnetwork.Interface) fakenetwork.InterfacesServer { + return fakenetwork.InterfacesServer{ + Get: func(ctx context.Context, resourceGroupName, networkInterfaceName string, options *armnetwork.InterfacesClientGetOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetResponse], errResp azfake.ErrorResponder) { + resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetResponse{Interface: interfaceResp}, nil) + return + }, + GetVirtualMachineScaleSetNetworkInterface: func(ctx context.Context, resourceGroupName, virtualMachineScaleSetName, virtualmachineIndex, networkInterfaceName string, options *armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse], errResp azfake.ErrorResponder) { + resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse{Interface: interfaceResp}, nil) + return + }, } - return m.networkInterface, nil +} + +func defaultMockVMServer(vmResp []armcompute.VirtualMachinesClientListAllResponse) fake.VirtualMachinesServer { + return fake.VirtualMachinesServer{ + NewListAllPager: func(options *armcompute.VirtualMachinesClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachinesClientListAllResponse]) { + for _, page := range vmResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } +} + +func defaultMockVMSSServer(vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse) fake.VirtualMachineScaleSetsServer { + return fake.VirtualMachineScaleSetsServer{ + NewListAllPager: func(options *armcompute.VirtualMachineScaleSetsClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetsClientListAllResponse]) { + for _, page := range vmssResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } +} + +func defaultMockVMSSVMServer(vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse) fake.VirtualMachineScaleSetVMsServer { + return fake.VirtualMachineScaleSetVMsServer{ + NewListPager: func(resourceGroupName, virtualMachineScaleSetName string, options *armcompute.VirtualMachineScaleSetVMsClientListOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetVMsClientListResponse]) { + for _, page := range vmssvmResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } +} + +func defaultVMWithIDAndName(id, name *string) *armcompute.VirtualMachine { + vmSize := armcompute.VirtualMachineSizeTypes("size") + osType := armcompute.OperatingSystemTypesLinux + defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/testVM" + defaultName := "testVM" + + if id == nil { + id = &defaultID + } + if name == nil { + name = &defaultName + } + + return &armcompute.VirtualMachine{ + ID: id, + Name: name, + Type: to.Ptr("Microsoft.Compute/virtualMachines"), + Location: to.Ptr("australiaeast"), + Properties: &armcompute.VirtualMachineProperties{ + OSProfile: &armcompute.OSProfile{ + ComputerName: to.Ptr("computer_name"), + }, + StorageProfile: &armcompute.StorageProfile{ + OSDisk: &armcompute.OSDisk{ + OSType: &osType, + }, + }, + NetworkProfile: &armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + { + ID: to.Ptr(defaultMockNetworkID), + }, + }, + }, + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: &vmSize, + }, + }, + Tags: map[string]*string{ + "prometheus": new(string), + }, + } +} + +func defaultVMSSVMWithIDAndName(id, name *string) *armcompute.VirtualMachineScaleSetVM { + vmSize := armcompute.VirtualMachineSizeTypes("size") + osType := armcompute.OperatingSystemTypesLinux + defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/testVMScaleSet/virtualMachines/testVM" + defaultName := "testVM" + + if id == nil { + id = &defaultID + } + if name == nil { + name = &defaultName + } + + return &armcompute.VirtualMachineScaleSetVM{ + ID: id, + Name: name, + Type: to.Ptr("Microsoft.Compute/virtualMachines"), + InstanceID: to.Ptr("123"), + Location: to.Ptr("australiaeast"), + Properties: &armcompute.VirtualMachineScaleSetVMProperties{ + OSProfile: &armcompute.OSProfile{ + ComputerName: to.Ptr("computer_name"), + }, + StorageProfile: &armcompute.StorageProfile{ + OSDisk: &armcompute.OSDisk{ + OSType: &osType, + }, + }, + NetworkProfile: &armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + {ID: to.Ptr(defaultMockNetworkID)}, + }, + }, + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: &vmSize, + }, + }, + Tags: map[string]*string{ + "prometheus": new(string), + }, + } +} + +func sortTargetsByID(targets []model.LabelSet) { + slices.SortFunc(targets, func(a, b model.LabelSet) int { + return strings.Compare(string(a["__meta_azure_machine_id"]), string(b["__meta_azure_machine_id"])) + }) } diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index bdc1fc8dce..fcae7b186f 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" consul "github.com/hashicorp/consul/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -113,8 +113,11 @@ type SDConfig struct { Services []string `yaml:"services,omitempty"` // A list of tags used to filter instances inside a service. Services must contain all tags in the list. ServiceTags []string `yaml:"tags,omitempty"` - // Desired node metadata. + // Desired node metadata. As of Consul 1.14, consider `filter` instead. NodeMeta map[string]string `yaml:"node_meta,omitempty"` + // Consul filter string + // See https://www.consul.io/api-docs/catalog#filtering-1, for syntax + Filter string `yaml:"filter,omitempty"` HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` } @@ -174,22 +177,23 @@ type Discovery struct { watchedServices []string // Set of services which will be discovered. watchedTags []string // Tags used to filter instances of a service. watchedNodeMeta map[string]string + watchedFilter string allowStale bool refreshInterval time.Duration finalizer func() - logger log.Logger + logger *slog.Logger metrics *consulMetrics } // NewDiscovery returns a new Discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*consulMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } wrapper, err := config.NewClientFromConfig(conf.HTTPClientConfig, "consul_sd", config.WithIdleConnTimeout(2*watchTimeout)) @@ -218,6 +222,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere watchedServices: conf.Services, watchedTags: conf.ServiceTags, watchedNodeMeta: conf.NodeMeta, + watchedFilter: conf.Filter, allowStale: conf.AllowStale, refreshInterval: time.Duration(conf.RefreshInterval), clientDatacenter: conf.Datacenter, @@ -282,7 +287,7 @@ func (d *Discovery) getDatacenter() error { info, err := d.client.Agent().Self() if err != nil { - level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) + d.logger.Error("Error retrieving datacenter name", "err", err) d.metrics.rpcFailuresCount.Inc() return err } @@ -290,12 +295,12 @@ func (d *Discovery) getDatacenter() error { dc, ok := info["Config"]["Datacenter"].(string) if !ok { err := fmt.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"]) - level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) + d.logger.Error("Error retrieving datacenter name", "err", err) return err } d.clientDatacenter = dc - d.logger = log.With(d.logger, "datacenter", dc) + d.logger = d.logger.With("datacenter", dc) return nil } @@ -361,13 +366,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { // entire list of services. func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) { catalog := d.client.Catalog() - level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ",")) + d.logger.Debug("Watching services", "tags", strings.Join(d.watchedTags, ","), "filter", d.watchedFilter) opts := &consul.QueryOptions{ WaitIndex: *lastIndex, WaitTime: watchTimeout, AllowStale: d.allowStale, NodeMeta: d.watchedNodeMeta, + Filter: d.watchedFilter, } t0 := time.Now() srvs, meta, err := catalog.Services(opts.WithContext(ctx)) @@ -382,7 +388,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. } if err != nil { - level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) + d.logger.Error("Error refreshing service list", "err", err) d.metrics.rpcFailuresCount.Inc() time.Sleep(retryInterval) return @@ -445,7 +451,7 @@ type consulService struct { discovery *Discovery client *consul.Client tagSeparator string - logger log.Logger + logger *slog.Logger rpcFailuresCount prometheus.Counter serviceRPCDuration prometheus.Observer } @@ -490,7 +496,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G // Get updates for a service. func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, health *consul.Health, lastIndex *uint64) { - level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ",")) + srv.logger.Debug("Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ",")) opts := &consul.QueryOptions{ WaitIndex: *lastIndex, @@ -513,7 +519,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr } if err != nil { - level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) + srv.logger.Error("Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) srv.rpcFailuresCount.Inc() time.Sleep(retryInterval) return diff --git a/discovery/consul/consul_test.go b/discovery/consul/consul_test.go index e3bc7938f5..cdbb80baba 100644 --- a/discovery/consul/consul_test.go +++ b/discovery/consul/consul_test.go @@ -21,10 +21,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "gopkg.in/yaml.v2" @@ -252,6 +252,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { case "/v1/catalog/services?index=1&wait=120000ms": time.Sleep(5 * time.Second) response = ServicesTestAnswer + case "/v1/catalog/services?filter=NodeMeta.rack_name+%3D%3D+%222304%22&index=1&wait=120000ms": + response = ServicesTestAnswer default: t.Errorf("Unhandled consul call: %s", r.URL) } @@ -270,7 +272,7 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { } func newDiscovery(t *testing.T, config *SDConfig) *Discovery { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() metrics := NewTestMetrics(t, config, prometheus.NewRegistry()) @@ -369,6 +371,27 @@ func TestAllOptions(t *testing.T) { <-ch } +// Watch the test service with a specific tag and node-meta via Filter parameter. +func TestFilterOption(t *testing.T) { + stub, config := newServer(t) + defer stub.Close() + + config.Services = []string{"test"} + config.Filter = `NodeMeta.rack_name == "2304"` + config.Token = "fake-token" + + d := newDiscovery(t, config) + + ctx, cancel := context.WithCancel(context.Background()) + ch := make(chan []*targetgroup.Group) + go func() { + d.Run(ctx, ch) + close(ch) + }() + checkOneTarget(t, <-ch) + cancel() +} + func TestGetDatacenterShouldReturnError(t *testing.T) { for _, tc := range []struct { handler func(http.ResponseWriter, *http.Request) @@ -407,7 +430,7 @@ func TestGetDatacenterShouldReturnError(t *testing.T) { err = d.getDatacenter() // An error should be returned. - require.Equal(t, tc.errMessage, err.Error()) + require.EqualError(t, err, tc.errMessage) // Should still be empty. require.Equal(t, "", d.clientDatacenter) } diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index ecee60cb1f..52f3a9c57a 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -16,6 +16,7 @@ package digitalocean import ( "context" "fmt" + "log/slog" "net" "net/http" "strconv" @@ -23,7 +24,6 @@ import ( "time" "github.com/digitalocean/godo" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -111,7 +111,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*digitaloceanMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/digitalocean/digitalocean_test.go b/discovery/digitalocean/digitalocean_test.go index 841b5ef977..a282225ac2 100644 --- a/discovery/digitalocean/digitalocean_test.go +++ b/discovery/digitalocean/digitalocean_test.go @@ -19,9 +19,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -57,7 +57,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/discovery.go b/discovery/discovery.go index a91faf6c86..c400de3632 100644 --- a/discovery/discovery.go +++ b/discovery/discovery.go @@ -15,9 +15,9 @@ package discovery import ( "context" + "log/slog" "reflect" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -47,7 +47,7 @@ type DiscovererMetrics interface { // DiscovererOptions provides options for a Discoverer. type DiscovererOptions struct { - Logger log.Logger + Logger *slog.Logger Metrics DiscovererMetrics @@ -109,7 +109,7 @@ func (c *Configs) SetDirectory(dir string) { // UnmarshalYAML implements yaml.Unmarshaler. func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error { - cfgTyp := getConfigType(configsType) + cfgTyp := reflect.StructOf(configFields) cfgPtr := reflect.New(cfgTyp) cfgVal := cfgPtr.Elem() @@ -124,7 +124,7 @@ func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error { // MarshalYAML implements yaml.Marshaler. func (c Configs) MarshalYAML() (interface{}, error) { - cfgTyp := getConfigType(configsType) + cfgTyp := reflect.StructOf(configFields) cfgPtr := reflect.New(cfgTyp) cfgVal := cfgPtr.Elem() diff --git a/util/testutil/logging.go b/discovery/discovery_test.go similarity index 57% rename from util/testutil/logging.go rename to discovery/discovery_test.go index db096ea234..af327195f2 100644 --- a/util/testutil/logging.go +++ b/discovery/discovery_test.go @@ -1,4 +1,4 @@ -// Copyright 2019 The Prometheus Authors +// Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -11,25 +11,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -package testutil +package discovery import ( "testing" - "github.com/go-kit/log" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" ) -type logger struct { - t *testing.T -} +func TestConfigsCustomUnMarshalMarshal(t *testing.T) { + input := `static_configs: +- targets: + - foo:1234 + - bar:4321 +` + cfg := &Configs{} + err := yaml.UnmarshalStrict([]byte(input), cfg) + require.NoError(t, err) -// NewLogger returns a gokit compatible Logger which calls t.Log. -func NewLogger(t *testing.T) log.Logger { - return logger{t: t} -} - -// Log implements log.Logger. -func (t logger) Log(keyvals ...interface{}) error { - t.t.Log(keyvals...) - return nil + output, err := yaml.Marshal(cfg) + require.NoError(t, err) + require.Equal(t, input, string(output)) } diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index 314c3d38cd..5de7f64886 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -111,21 +111,21 @@ type Discovery struct { names []string port int qtype uint16 - logger log.Logger + logger *slog.Logger metrics *dnsMetrics - lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) + lookupFn func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dnsMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } qtype := dns.TypeSRV @@ -174,7 +174,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { for _, name := range d.names { go func(n string) { if err := d.refreshOne(ctx, n, ch); err != nil && !errors.Is(err, context.Canceled) { - level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err) + d.logger.Error("Error refreshing DNS targets", "err", err) } wg.Done() }(name) @@ -238,7 +238,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ // CNAME responses can occur with "Type: A" dns_sd_config requests. continue default: - level.Warn(d.logger).Log("msg", "Invalid record", "record", record) + d.logger.Warn("Invalid record", "record", record) continue } tg.Targets = append(tg.Targets, model.LabelSet{ @@ -288,7 +288,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ // error will be generic-looking, because trying to return all the errors // returned by the combination of all name permutations and servers is a // nightmare. -func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { +func lookupWithSearchPath(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { conf, err := dns.ClientConfigFromFile(resolvConf) if err != nil { return nil, fmt.Errorf("could not load resolv.conf: %w", err) @@ -337,14 +337,14 @@ func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Ms // A non-viable answer is "anything else", which encompasses both various // system-level problems (like network timeouts) and also // valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc). -func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) { +func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger *slog.Logger) (*dns.Msg, error) { client := &dns.Client{} for _, server := range conf.Servers { servAddr := net.JoinHostPort(server, conf.Port) msg, err := askServerForName(name, qtype, client, servAddr, true) if err != nil { - level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err) + logger.Warn("DNS resolution failed", "server", server, "name", name, "err", err) continue } diff --git a/discovery/dns/dns_test.go b/discovery/dns/dns_test.go index 33a976827d..96bb32491f 100644 --- a/discovery/dns/dns_test.go +++ b/discovery/dns/dns_test.go @@ -16,11 +16,11 @@ package dns import ( "context" "fmt" + "log/slog" "net" "testing" "time" - "github.com/go-kit/log" "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -40,7 +40,7 @@ func TestDNS(t *testing.T) { testCases := []struct { name string config SDConfig - lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) + lookup func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) expected []*targetgroup.Group }{ @@ -52,7 +52,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "A", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return nil, fmt.Errorf("some error") }, expected: []*targetgroup.Group{}, @@ -65,7 +65,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "A", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.A{A: net.IPv4(192, 0, 2, 2)}, @@ -97,7 +97,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "AAAA", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.AAAA{AAAA: net.IPv6loopback}, @@ -128,7 +128,7 @@ func TestDNS(t *testing.T) { Type: "SRV", RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, @@ -167,7 +167,7 @@ func TestDNS(t *testing.T) { Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, @@ -198,7 +198,7 @@ func TestDNS(t *testing.T) { Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{}, nil }, expected: []*targetgroup.Group{ @@ -215,7 +215,7 @@ func TestDNS(t *testing.T) { Port: 25, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.MX{Preference: 0, Mx: "smtp1.example.com."}, diff --git a/discovery/eureka/eureka.go b/discovery/eureka/eureka.go index 779c081aee..5087346486 100644 --- a/discovery/eureka/eureka.go +++ b/discovery/eureka/eureka.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "net/http" "net/url" "strconv" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -126,7 +126,7 @@ type Discovery struct { } // NewDiscovery creates a new Eureka discovery for the given role. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*eurekaMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/file/file.go b/discovery/file/file.go index e7e9d0870f..1c36b254cc 100644 --- a/discovery/file/file.go +++ b/discovery/file/file.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "strings" @@ -26,12 +27,11 @@ import ( "time" "github.com/fsnotify/fsnotify" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery" @@ -175,20 +175,20 @@ type Discovery struct { // and how many target groups they contained. // This is used to detect deleted target groups. lastRefresh map[string]int - logger log.Logger + logger *slog.Logger metrics *fileMetrics } // NewDiscovery returns a new file discovery for the given paths. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { fm, ok := metrics.(*fileMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } disc := &Discovery{ @@ -210,7 +210,7 @@ func (d *Discovery) listFiles() []string { for _, p := range d.paths { files, err := filepath.Glob(p) if err != nil { - level.Error(d.logger).Log("msg", "Error expanding glob", "glob", p, "err", err) + d.logger.Error("Error expanding glob", "glob", p, "err", err) continue } paths = append(paths, files...) @@ -231,7 +231,7 @@ func (d *Discovery) watchFiles() { p = "./" } if err := d.watcher.Add(p); err != nil { - level.Error(d.logger).Log("msg", "Error adding file watch", "path", p, "err", err) + d.logger.Error("Error adding file watch", "path", p, "err", err) } } } @@ -240,7 +240,7 @@ func (d *Discovery) watchFiles() { func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { watcher, err := fsnotify.NewWatcher() if err != nil { - level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err) + d.logger.Error("Error adding file watcher", "err", err) d.metrics.fileWatcherErrorsCount.Inc() return } @@ -280,7 +280,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { case err := <-d.watcher.Errors: if err != nil { - level.Error(d.logger).Log("msg", "Error watching file", "err", err) + d.logger.Error("Error watching file", "err", err) } } } @@ -300,7 +300,7 @@ func (d *Discovery) deleteTimestamp(filename string) { // stop shuts down the file watcher. func (d *Discovery) stop() { - level.Debug(d.logger).Log("msg", "Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths)) + d.logger.Debug("Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths)) done := make(chan struct{}) defer close(done) @@ -320,10 +320,10 @@ func (d *Discovery) stop() { } }() if err := d.watcher.Close(); err != nil { - level.Error(d.logger).Log("msg", "Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err) + d.logger.Error("Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err) } - level.Debug(d.logger).Log("msg", "File discovery stopped") + d.logger.Debug("File discovery stopped") } // refresh reads all files matching the discovery's patterns and sends the respective @@ -339,7 +339,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) if err != nil { d.metrics.fileSDReadErrorsCount.Inc() - level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err) + d.logger.Error("Error reading file", "path", p, "err", err) // Prevent deletion down below. ref[p] = d.lastRefresh[p] continue @@ -356,7 +356,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) for f, n := range d.lastRefresh { m, ok := ref[f] if !ok || n > m { - level.Debug(d.logger).Log("msg", "file_sd refresh found file that should be removed", "file", f) + d.logger.Debug("file_sd refresh found file that should be removed", "file", f) d.deleteTimestamp(f) for i := m; i < n; i++ { select { diff --git a/discovery/gce/gce.go b/discovery/gce/gce.go index 15f32dd247..a509a144e1 100644 --- a/discovery/gce/gce.go +++ b/discovery/gce/gce.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "golang.org/x/oauth2/google" @@ -129,7 +129,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*gceMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/hetzner/hcloud.go b/discovery/hetzner/hcloud.go index df56f94c5f..ba64250c0f 100644 --- a/discovery/hetzner/hcloud.go +++ b/discovery/hetzner/hcloud.go @@ -15,12 +15,12 @@ package hetzner import ( "context" + "log/slog" "net" "net/http" "strconv" "time" - "github.com/go-kit/log" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -58,7 +58,7 @@ type hcloudDiscovery struct { } // newHcloudDiscovery returns a new hcloudDiscovery which periodically refreshes its targets. -func newHcloudDiscovery(conf *SDConfig, _ log.Logger) (*hcloudDiscovery, error) { +func newHcloudDiscovery(conf *SDConfig, _ *slog.Logger) (*hcloudDiscovery, error) { d := &hcloudDiscovery{ port: conf.Port, } diff --git a/discovery/hetzner/hcloud_test.go b/discovery/hetzner/hcloud_test.go index 10b799037a..fa8291625a 100644 --- a/discovery/hetzner/hcloud_test.go +++ b/discovery/hetzner/hcloud_test.go @@ -18,8 +18,8 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -43,7 +43,7 @@ func TestHCloudSDRefresh(t *testing.T) { cfg.HTTPClientConfig.BearerToken = hcloudTestToken cfg.hcloudEndpoint = suite.Mock.Endpoint() - d, err := newHcloudDiscovery(&cfg, log.NewNopLogger()) + d, err := newHcloudDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) diff --git a/discovery/hetzner/hetzner.go b/discovery/hetzner/hetzner.go index 69c823d382..980c197d77 100644 --- a/discovery/hetzner/hetzner.go +++ b/discovery/hetzner/hetzner.go @@ -17,9 +17,9 @@ import ( "context" "errors" "fmt" + "log/slog" "time" - "github.com/go-kit/log" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -135,7 +135,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*hetznerMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -157,7 +157,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere ), nil } -func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) { switch conf.Role { case HetznerRoleHcloud: if conf.hcloudEndpoint == "" { diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go index 516470b05a..958f8f710f 100644 --- a/discovery/hetzner/robot.go +++ b/discovery/hetzner/robot.go @@ -18,13 +18,13 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -51,7 +51,7 @@ type robotDiscovery struct { } // newRobotDiscovery returns a new robotDiscovery which periodically refreshes its targets. -func newRobotDiscovery(conf *SDConfig, _ log.Logger) (*robotDiscovery, error) { +func newRobotDiscovery(conf *SDConfig, _ *slog.Logger) (*robotDiscovery, error) { d := &robotDiscovery{ port: conf.Port, endpoint: conf.robotEndpoint, diff --git a/discovery/hetzner/robot_test.go b/discovery/hetzner/robot_test.go index abee5fea90..2618bd097c 100644 --- a/discovery/hetzner/robot_test.go +++ b/discovery/hetzner/robot_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -42,7 +42,7 @@ func TestRobotSDRefresh(t *testing.T) { cfg.HTTPClientConfig.BasicAuth = &config.BasicAuth{Username: robotTestUsername, Password: robotTestPassword} cfg.robotEndpoint = suite.Mock.Endpoint() - d, err := newRobotDiscovery(&cfg, log.NewNopLogger()) + d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) @@ -91,12 +91,11 @@ func TestRobotSDRefreshHandleError(t *testing.T) { cfg := DefaultSDConfig cfg.robotEndpoint = suite.Mock.Endpoint() - d, err := newRobotDiscovery(&cfg, log.NewNopLogger()) + d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) - require.Error(t, err) - require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error()) + require.EqualError(t, err, "non 2xx status '401' response during hetzner service discovery with role robot") require.Empty(t, targetGroups) } diff --git a/discovery/http/http.go b/discovery/http/http.go index ff76fd7627..65404694c4 100644 --- a/discovery/http/http.go +++ b/discovery/http/http.go @@ -19,17 +19,18 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "net/url" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/discovery" @@ -40,8 +41,8 @@ import ( var ( // DefaultSDConfig is the default HTTP SD configuration. DefaultSDConfig = SDConfig{ - RefreshInterval: model.Duration(60 * time.Second), HTTPClientConfig: config.DefaultHTTPClientConfig, + RefreshInterval: model.Duration(60 * time.Second), } userAgent = fmt.Sprintf("Prometheus/%s", version.Version) matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`) @@ -114,14 +115,14 @@ type Discovery struct { } // NewDiscovery returns a new HTTP discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*httpMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http", clientOpts...) diff --git a/discovery/http/http_test.go b/discovery/http/http_test.go index 0cafe035dc..9d3a3fb5e7 100644 --- a/discovery/http/http_test.go +++ b/discovery/http/http_test.go @@ -21,11 +21,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -49,7 +49,7 @@ func TestHTTPValidRefresh(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -94,7 +94,7 @@ func TestHTTPInvalidCode(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -123,7 +123,7 @@ func TestHTTPInvalidFormat(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -442,7 +442,7 @@ func TestSourceDisappeared(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) for _, test := range cases { ctx := context.Background() diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go index c8b4f7f8e5..1aa21667e3 100644 --- a/discovery/ionos/ionos.go +++ b/discovery/ionos/ionos.go @@ -16,9 +16,9 @@ package ionos import ( "errors" "fmt" + "log/slog" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -43,7 +43,7 @@ func init() { type Discovery struct{} // NewDiscovery returns a new refresh.Discovery for IONOS Cloud. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ionosMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/ionos/server.go b/discovery/ionos/server.go index a850fbbfb4..18e89b1d43 100644 --- a/discovery/ionos/server.go +++ b/discovery/ionos/server.go @@ -16,13 +16,13 @@ package ionos import ( "context" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" ionoscloud "github.com/ionos-cloud/sdk-go/v6" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -60,7 +60,7 @@ type serverDiscovery struct { datacenterID string } -func newServerDiscovery(conf *SDConfig, _ log.Logger) (*serverDiscovery, error) { +func newServerDiscovery(conf *SDConfig, _ *slog.Logger) (*serverDiscovery, error) { d := &serverDiscovery{ port: conf.Port, datacenterID: conf.DatacenterID, diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index c7a60ae6d3..14d3bc7a99 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,7 +33,7 @@ import ( // Endpoints discovers new endpoint targets. type Endpoints struct { - logger log.Logger + logger *slog.Logger endpointsInf cache.SharedIndexInformer serviceInf cache.SharedInformer @@ -49,9 +49,9 @@ type Endpoints struct { } // NewEndpoints returns a new endpoints discovery. -func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints { +func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd) @@ -92,26 +92,23 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding endpoints event handler.", "err", err) + l.Error("Error adding endpoints event handler.", "err", err) } serviceUpdate := func(o interface{}) { svc, err := convertToService(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err) + e.logger.Error("converting to Service object failed", "err", err) return } - ep := &apiv1.Endpoints{} - ep.Namespace = svc.Namespace - ep.Name = svc.Name - obj, exists, err := e.endpointsStore.Get(ep) + obj, exists, err := e.endpointsStore.GetByKey(namespacedName(svc.Namespace, svc.Name)) if exists && err == nil { e.enqueue(obj.(*apiv1.Endpoints)) } if err != nil { - level.Error(e.logger).Log("msg", "retrieving endpoints failed", "err", err) + e.logger.Error("retrieving endpoints failed", "err", err) } } _, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -131,7 +128,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } _, err = e.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: func(old, cur interface{}) { @@ -154,7 +151,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } if e.withNodeMetadata { _, err = e.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -167,12 +164,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } } @@ -182,7 +182,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca func (e *Endpoints) enqueueNode(nodeName string) { endpoints, err := e.endpointsInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err) return } @@ -194,7 +194,7 @@ func (e *Endpoints) enqueueNode(nodeName string) { func (e *Endpoints) enqueuePod(podNamespacedName string) { endpoints, err := e.endpointsInf.GetIndexer().ByIndex(podIndex, podNamespacedName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for pod", "pod", podNamespacedName, "err", err) + e.logger.Error("Error getting endpoints for pod", "pod", podNamespacedName, "err", err) return } @@ -223,7 +223,7 @@ func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache") + e.logger.Error("endpoints informer unable to sync cache") } return } @@ -247,13 +247,13 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group) namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - level.Error(e.logger).Log("msg", "splitting key failed", "key", key) + e.logger.Error("splitting key failed", "key", key) return true } o, exists, err := e.endpointsStore.GetByKey(key) if err != nil { - level.Error(e.logger).Log("msg", "getting object from store failed", "key", key) + e.logger.Error("getting object from store failed", "key", key) return true } if !exists { @@ -262,7 +262,7 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group) } eps, err := convertToEndpoints(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Endpoints object failed", "err", err) + e.logger.Error("converting to Endpoints object failed", "err", err) return true } send(ctx, ch, e.buildEndpoints(eps)) @@ -361,16 +361,19 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { target = target.Merge(podLabels(pod)) // Attach potential container port labels matching the endpoint port. - for _, c := range pod.Spec.Containers { + containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { if port.Port == cport.ContainerPort { ports := strconv.FormatUint(uint64(port.Port), 10) + isInit := i >= len(pod.Spec.Containers) target[podContainerNameLabel] = lv(c.Name) target[podContainerImageLabel] = lv(c.Image) target[podContainerPortNameLabel] = lv(cport.Name) target[podContainerPortNumberLabel] = lv(ports) target[podContainerPortProtocolLabel] = lv(string(port.Protocol)) + target[podContainerIsInit] = lv(strconv.FormatBool(isInit)) break } } @@ -397,10 +400,10 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { v := eps.Labels[apiv1.EndpointsOverCapacity] if v == "truncated" { - level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name) + e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name) } if v == "warning" { - level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name) + e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name) } // For all seen pods, check all container ports. If they were not covered @@ -411,7 +414,8 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { continue } - for _, c := range pe.pod.Spec.Containers { + containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { hasSeenPort := func() bool { for _, eport := range pe.servicePorts { @@ -428,6 +432,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10)) ports := strconv.FormatUint(uint64(cport.ContainerPort), 10) + isInit := i >= len(pe.pod.Spec.Containers) target := model.LabelSet{ model.AddressLabel: lv(a), podContainerNameLabel: lv(c.Name), @@ -435,6 +440,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { podContainerPortNameLabel: lv(cport.Name), podContainerPortNumberLabel: lv(ports), podContainerPortProtocolLabel: lv(string(cport.Protocol)), + podContainerIsInit: lv(strconv.FormatBool(isInit)), } tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod))) } @@ -448,13 +454,10 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { - level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) + e.logger.Error("resolving pod ref failed", "err", err) return nil } if !exists { @@ -464,31 +467,27 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { } func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { - svc := &apiv1.Service{} - svc.Namespace = ns - svc.Name = name - - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { - level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) + e.logger.Error("retrieving service failed", "err", err) return } if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } -func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger log.Logger, nodeName *string) model.LabelSet { +func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger *slog.Logger, nodeName *string) model.LabelSet { if nodeName == nil { return tg } obj, exists, err := nodeInf.GetStore().GetByKey(*nodeName) if err != nil { - level.Error(logger).Log("msg", "Error getting node", "node", *nodeName, "err", err) + logger.Error("Error getting node", "node", *nodeName, "err", err) return tg } diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go index 3ea98c5db9..a1ac6e5d48 100644 --- a/discovery/kubernetes/endpoints_test.go +++ b/discovery/kubernetes/endpoints_test.go @@ -18,10 +18,12 @@ import ( "testing" "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -244,6 +246,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4:9001", @@ -259,6 +262,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9001", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -821,6 +825,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1078,6 +1083,7 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1089,3 +1095,186 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) { }, }.Run(t) } + +func TestEndpointsDiscoverySidecarContainer(t *testing.T) { + objs := []runtime.Object{ + &v1.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testsidecar", + Namespace: "default", + }, + Subsets: []v1.EndpointSubset{ + { + Addresses: []v1.EndpointAddress{ + { + IP: "4.3.2.1", + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "default", + }, + }, + }, + Ports: []v1.EndpointPort{ + { + Name: "testport", + Port: 9000, + Protocol: v1.ProtocolTCP, + }, + { + Name: "initport", + Port: 9111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + }, + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: "default", + UID: types.UID("deadbeef"), + }, + Spec: v1.PodSpec{ + NodeName: "testnode", + InitContainers: []v1.Container{ + { + Name: "ic1", + Image: "ic1:latest", + Ports: []v1.ContainerPort{ + { + Name: "initport", + ContainerPort: 1111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + { + Name: "ic2", + Image: "ic2:latest", + Ports: []v1.ContainerPort{ + { + Name: "initport", + ContainerPort: 9111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + Containers: []v1.Container{ + { + Name: "c1", + Image: "c1:latest", + Ports: []v1.ContainerPort{ + { + Name: "mainport", + ContainerPort: 9000, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + }, + Status: v1.PodStatus{ + HostIP: "2.3.4.5", + PodIP: "4.3.2.1", + }, + }, + } + + n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpoints/default/testsidecar": { + Targets: []model.LabelSet{ + { + "__address__": "4.3.2.1:9000", + "__meta_kubernetes_endpoint_address_target_kind": "Pod", + "__meta_kubernetes_endpoint_address_target_name": "testpod", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_pod_container_image": "c1:latest", + "__meta_kubernetes_pod_container_name": "c1", + "__meta_kubernetes_pod_container_port_name": "mainport", + "__meta_kubernetes_pod_container_port_number": "9000", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", + }, + { + "__address__": "4.3.2.1:9111", + "__meta_kubernetes_endpoint_address_target_kind": "Pod", + "__meta_kubernetes_endpoint_address_target_name": "testpod", + "__meta_kubernetes_endpoint_port_name": "initport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_pod_container_image": "ic2:latest", + "__meta_kubernetes_pod_container_name": "ic2", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "9111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + { + "__address__": "4.3.2.1:1111", + "__meta_kubernetes_pod_container_image": "ic1:latest", + "__meta_kubernetes_pod_container_name": "ic1", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "1111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpoints_name": "testsidecar", + "__meta_kubernetes_namespace": "default", + }, + Source: "endpoints/default/testsidecar", + }, + }, + }.Run(t) +} + +func BenchmarkResolvePodRef(b *testing.B) { + indexer := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, nil) + e := &Endpoints{ + podStore: indexer, + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + p := e.resolvePodRef(&v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "foo", + }) + require.Nil(b, p) + } +} diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index 2ac65ef414..45bc43eff9 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" "k8s.io/client-go/tools/cache" @@ -35,7 +35,7 @@ import ( // EndpointSlice discovers new endpoint targets. type EndpointSlice struct { - logger log.Logger + logger *slog.Logger endpointSliceInf cache.SharedIndexInformer serviceInf cache.SharedInformer @@ -51,9 +51,9 @@ type EndpointSlice struct { } // NewEndpointSlice returns a new endpointslice discovery. -func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice { +func NewEndpointSlice(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd) @@ -92,13 +92,13 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod }, }) if err != nil { - level.Error(l).Log("msg", "Error adding endpoint slices event handler.", "err", err) + l.Error("Error adding endpoint slices event handler.", "err", err) } serviceUpdate := func(o interface{}) { svc, err := convertToService(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err) + e.logger.Error("converting to Service object failed", "err", err) return } @@ -108,7 +108,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod for _, obj := range e.endpointSliceStore.List() { esa, err := e.getEndpointSliceAdaptor(obj) if err != nil { - level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err) + e.logger.Error("converting to EndpointSlice object failed", "err", err) continue } if lv, exists := esa.labels()[esa.labelServiceName()]; exists && lv == svc.Name { @@ -131,7 +131,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } if e.withNodeMetadata { @@ -145,12 +145,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } } @@ -160,7 +163,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod func (e *EndpointSlice) enqueueNode(nodeName string) { endpoints, err := e.endpointSliceInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err) return } @@ -188,7 +191,7 @@ func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group) } if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache") + e.logger.Error("endpointslice informer unable to sync cache") } return } @@ -212,13 +215,13 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - level.Error(e.logger).Log("msg", "splitting key failed", "key", key) + e.logger.Error("splitting key failed", "key", key) return true } o, exists, err := e.endpointSliceStore.GetByKey(key) if err != nil { - level.Error(e.logger).Log("msg", "getting object from store failed", "key", key) + e.logger.Error("getting object from store failed", "key", key) return true } if !exists { @@ -228,7 +231,7 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr esa, err := e.getEndpointSliceAdaptor(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err) + e.logger.Error("converting to EndpointSlice object failed", "err", err) return true } @@ -377,19 +380,23 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou target = target.Merge(podLabels(pod)) // Attach potential container port labels matching the endpoint port. - for _, c := range pod.Spec.Containers { + containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { if port.port() == nil { continue } + if *port.port() == cport.ContainerPort { ports := strconv.FormatUint(uint64(*port.port()), 10) + isInit := i >= len(pod.Spec.Containers) target[podContainerNameLabel] = lv(c.Name) target[podContainerImageLabel] = lv(c.Image) target[podContainerPortNameLabel] = lv(cport.Name) target[podContainerPortNumberLabel] = lv(ports) target[podContainerPortProtocolLabel] = lv(string(cport.Protocol)) + target[podContainerIsInit] = lv(strconv.FormatBool(isInit)) break } } @@ -417,7 +424,8 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou continue } - for _, c := range pe.pod.Spec.Containers { + containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { hasSeenPort := func() bool { for _, eport := range pe.servicePorts { @@ -437,6 +445,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10)) ports := strconv.FormatUint(uint64(cport.ContainerPort), 10) + isInit := i >= len(pe.pod.Spec.Containers) target := model.LabelSet{ model.AddressLabel: lv(a), podContainerNameLabel: lv(c.Name), @@ -444,6 +453,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou podContainerPortNameLabel: lv(cport.Name), podContainerPortNumberLabel: lv(ports), podContainerPortProtocolLabel: lv(string(cport.Protocol)), + podContainerIsInit: lv(strconv.FormatBool(isInit)), } tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod))) } @@ -457,13 +467,10 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { - level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) + e.logger.Error("resolving pod ref failed", "err", err) return nil } if !exists { @@ -474,27 +481,27 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgroup.Group) { var ( - svc = &apiv1.Service{} found bool + name string ) - svc.Namespace = esa.namespace() + ns := esa.namespace() // Every EndpointSlice object has the Service they belong to in the // kubernetes.io/service-name label. - svc.Name, found = esa.labels()[esa.labelServiceName()] + name, found = esa.labels()[esa.labelServiceName()] if !found { return } - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { - level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) + e.logger.Error("retrieving service failed", "err", err) return } if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } diff --git a/discovery/kubernetes/endpointslice_test.go b/discovery/kubernetes/endpointslice_test.go index c7e99b0a00..cc92c7ddaa 100644 --- a/discovery/kubernetes/endpointslice_test.go +++ b/discovery/kubernetes/endpointslice_test.go @@ -291,6 +291,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4:9001", @@ -306,6 +307,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -986,6 +988,7 @@ func TestEndpointSliceDiscoveryNamespaces(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1199,3 +1202,165 @@ func TestEndpointSliceInfIndexersCount(t *testing.T) { }) } } + +func TestEndpointSliceDiscoverySidecarContainer(t *testing.T) { + objs := []runtime.Object{ + &v1.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testsidecar", + Namespace: "default", + }, + AddressType: v1.AddressTypeIPv4, + Ports: []v1.EndpointPort{ + { + Name: strptr("testport"), + Port: int32ptr(9000), + Protocol: protocolptr(corev1.ProtocolTCP), + }, + { + Name: strptr("initport"), + Port: int32ptr(9111), + Protocol: protocolptr(corev1.ProtocolTCP), + }, + }, + Endpoints: []v1.Endpoint{ + { + Addresses: []string{"4.3.2.1"}, + TargetRef: &corev1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "default", + }, + }, + }, + }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: "default", + UID: types.UID("deadbeef"), + }, + Spec: corev1.PodSpec{ + NodeName: "testnode", + InitContainers: []corev1.Container{ + { + Name: "ic1", + Image: "ic1:latest", + Ports: []corev1.ContainerPort{ + { + Name: "initport", + ContainerPort: 1111, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + { + Name: "ic2", + Image: "ic2:latest", + Ports: []corev1.ContainerPort{ + { + Name: "initport", + ContainerPort: 9111, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "c1", + Image: "c1:latest", + Ports: []corev1.ContainerPort{ + { + Name: "mainport", + ContainerPort: 9000, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + HostIP: "2.3.4.5", + PodIP: "4.3.2.1", + }, + }, + } + + n, _ := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{}, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpointslice/default/testsidecar": { + Targets: []model.LabelSet{ + { + "__address__": "4.3.2.1:9000", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "testpod", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_pod_container_image": "c1:latest", + "__meta_kubernetes_pod_container_name": "c1", + "__meta_kubernetes_pod_container_port_name": "mainport", + "__meta_kubernetes_pod_container_port_number": "9000", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", + }, + { + "__address__": "4.3.2.1:9111", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "testpod", + "__meta_kubernetes_endpointslice_port": "9111", + "__meta_kubernetes_endpointslice_port_name": "initport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_pod_container_image": "ic2:latest", + "__meta_kubernetes_pod_container_name": "ic2", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "9111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + { + "__address__": "4.3.2.1:1111", + "__meta_kubernetes_pod_container_image": "ic1:latest", + "__meta_kubernetes_pod_container_name": "ic1", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "1111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpointslice_address_type": "IPv4", + "__meta_kubernetes_endpointslice_name": "testsidecar", + "__meta_kubernetes_namespace": "default", + }, + Source: "endpointslice/default/testsidecar", + }, + }, + }.Run(t) +} diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index 4d91e7a460..1b7847c5c4 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -17,10 +17,9 @@ import ( "context" "errors" "fmt" + "log/slog" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" v1 "k8s.io/api/networking/v1" @@ -32,14 +31,14 @@ import ( // Ingress implements discovery of Kubernetes ingress. type Ingress struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewIngress returns a new ingress discovery. -func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { +func NewIngress(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd) ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate) ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete) @@ -66,7 +65,7 @@ func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C }, }) if err != nil { - level.Error(l).Log("msg", "Error adding ingresses event handler.", "err", err) + l.Error("Error adding ingresses event handler.", "err", err) } return s } @@ -86,7 +85,7 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), i.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(i.logger).Log("msg", "ingress informer unable to sync cache") + i.logger.Error("ingress informer unable to sync cache") } return } @@ -127,7 +126,7 @@ func (i *Ingress) process(ctx context.Context, ch chan<- []*targetgroup.Group) b case *v1.Ingress: ia = newIngressAdaptorFromV1(ingress) default: - level.Error(i.logger).Log("msg", "converting to Ingress object failed", "err", + i.logger.Error("converting to Ingress object failed", "err", fmt.Errorf("received unexpected object: %v", o)) return true } diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index 93ac65d8dc..64e8886cfd 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "os" "reflect" "strings" @@ -25,11 +26,10 @@ import ( "github.com/prometheus/prometheus/util/strutil" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" apiv1 "k8s.io/api/core/v1" disv1 "k8s.io/api/discovery/v1" @@ -260,7 +260,7 @@ type Discovery struct { sync.RWMutex client kubernetes.Interface role Role - logger log.Logger + logger *slog.Logger namespaceDiscovery *NamespaceDiscovery discoverers []discovery.Discoverer selectors roleSelector @@ -285,14 +285,14 @@ func (d *Discovery) getNamespaces() []string { } // New creates a new Kubernetes discovery for the given role. -func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) { +func New(l *slog.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) { m, ok := metrics.(*kubernetesMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } var ( kcfg *rest.Config @@ -324,7 +324,7 @@ func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Di ownNamespace = string(ownNamespaceContents) } - level.Info(l).Log("msg", "Using pod service account via in-cluster config") + l.Info("Using pod service account via in-cluster config") default: rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd") if err != nil { @@ -446,7 +446,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { go nodeInf.Run(ctx.Done()) } eps := NewEndpointSlice( - log.With(d.logger, "role", "endpointslice"), + d.logger.With("role", "endpointslice"), informer, d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), @@ -506,7 +506,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } eps := NewEndpoints( - log.With(d.logger, "role", "endpoint"), + d.logger.With("role", "endpoint"), d.newEndpointsByNodeInformer(elw), d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), @@ -540,7 +540,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { }, } pod := NewPod( - log.With(d.logger, "role", "pod"), + d.logger.With("role", "pod"), d.newPodsByNodeInformer(plw), nodeInformer, d.metrics.eventCount, @@ -564,7 +564,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { }, } svc := NewService( - log.With(d.logger, "role", "service"), + d.logger.With("role", "service"), d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.metrics.eventCount, ) @@ -589,7 +589,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled) ingress := NewIngress( - log.With(d.logger, "role", "ingress"), + d.logger.With("role", "ingress"), informer, d.metrics.eventCount, ) @@ -598,11 +598,11 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } case RoleNode: nodeInformer := d.newNodeInformer(ctx) - node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.metrics.eventCount) + node := NewNode(d.logger.With("role", "node"), nodeInformer, d.metrics.eventCount) d.discoverers = append(d.discoverers, node) go node.informer.Run(ctx.Done()) default: - level.Error(d.logger).Log("msg", "unknown Kubernetes discovery kind", "role", d.role) + d.logger.Error("unknown Kubernetes discovery kind", "role", d.role) } var wg sync.WaitGroup @@ -804,3 +804,13 @@ func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, func namespacedName(namespace, name string) string { return namespace + "/" + name } + +// nodeName knows how to handle the cache.DeletedFinalStateUnknown tombstone. +// It assumes the MetaNamespaceKeyFunc keyFunc is used, which uses the node name as the tombstone key. +func nodeName(o interface{}) (string, error) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(o) + if err != nil { + return "", err + } + return key, nil +} diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go index a026366502..a14f2b3d1b 100644 --- a/discovery/kubernetes/kubernetes_test.go +++ b/discovery/kubernetes/kubernetes_test.go @@ -20,10 +20,12 @@ import ( "testing" "time" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + apiv1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/version" "k8s.io/apimachinery/pkg/watch" @@ -71,7 +73,7 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer d := &Discovery{ client: clientset, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), role: role, namespaceDiscovery: &nsDiscovery, ownNamespace: "own-ns", @@ -320,3 +322,18 @@ func TestFailuresCountMetric(t *testing.T) { }) } } + +func TestNodeName(t *testing.T) { + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + } + name, err := nodeName(node) + require.NoError(t, err) + require.Equal(t, "foo", name) + + name, err = nodeName(cache.DeletedFinalStateUnknown{Key: "bar"}) + require.NoError(t, err) + require.Equal(t, "bar", name) +} diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index 74d87e22c4..0e0c5745f2 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -38,16 +38,16 @@ const ( // Node discovers Kubernetes nodes. type Node struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewNode returns a new node discovery. -func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node { +func NewNode(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd) @@ -76,13 +76,13 @@ func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.Coun }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } return n } func (n *Node) enqueue(obj interface{}) { - key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + key, err := nodeName(obj) if err != nil { return } @@ -96,7 +96,7 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), n.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(n.logger).Log("msg", "node informer unable to sync cache") + n.logger.Error("node informer unable to sync cache") } return } @@ -133,7 +133,7 @@ func (n *Node) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool } node, err := convertToNode(o) if err != nil { - level.Error(n.logger).Log("msg", "converting to Node object failed", "err", err) + n.logger.Error("converting to Node object failed", "err", err) return true } send(ctx, ch, n.buildNode(node)) @@ -181,7 +181,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group { addr, addrMap, err := nodeAddress(node) if err != nil { - level.Warn(n.logger).Log("msg", "No node address found", "err", err) + n.logger.Warn("No node address found", "err", err) return nil } addr = net.JoinHostPort(addr, strconv.FormatInt(int64(node.Status.DaemonEndpoints.KubeletEndpoint.Port), 10)) diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 02990e415f..8704a66239 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -17,14 +17,14 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" @@ -44,14 +44,14 @@ type Pod struct { nodeInf cache.SharedInformer withNodeMetadata bool store cache.Store - logger log.Logger + logger *slog.Logger queue *workqueue.Type } // NewPod creates a new pod discovery. -func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod { +func NewPod(l *slog.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd) @@ -81,7 +81,7 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } if p.withNodeMetadata { @@ -95,12 +95,15 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo p.enqueuePodsForNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - p.enqueuePodsForNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + p.enqueuePodsForNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } } @@ -127,7 +130,7 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(p.logger).Log("msg", "pod informer unable to sync cache") + p.logger.Error("pod informer unable to sync cache") } return } @@ -164,7 +167,7 @@ func (p *Pod) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool } pod, err := convertToPod(o) if err != nil { - level.Error(p.logger).Log("msg", "converting to Pod object failed", "err", err) + p.logger.Error("converting to Pod object failed", "err", err) return true } send(ctx, ch, p.buildPod(pod)) @@ -246,7 +249,7 @@ func (p *Pod) findPodContainerStatus(statuses *[]apiv1.ContainerStatus, containe func (p *Pod) findPodContainerID(statuses *[]apiv1.ContainerStatus, containerName string) string { cStatus, err := p.findPodContainerStatus(statuses, containerName) if err != nil { - level.Debug(p.logger).Log("msg", "cannot find container ID", "err", err) + p.logger.Debug("cannot find container ID", "err", err) return "" } return cStatus.ContainerID @@ -315,7 +318,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group { func (p *Pod) enqueuePodsForNode(nodeName string) { pods, err := p.podInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(p.logger).Log("msg", "Error getting pods for node", "node", nodeName, "err", err) + p.logger.Error("Error getting pods for node", "node", nodeName, "err", err) return } diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go index 51204a5a1a..e666497c86 100644 --- a/discovery/kubernetes/service.go +++ b/discovery/kubernetes/service.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,16 +33,16 @@ import ( // Service implements discovery of Kubernetes services. type Service struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewService returns a new service discovery. -func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service { +func NewService(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd) @@ -71,7 +71,7 @@ func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } return s } @@ -91,7 +91,7 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), s.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(s.logger).Log("msg", "service informer unable to sync cache") + s.logger.Error("service informer unable to sync cache") } return } @@ -128,7 +128,7 @@ func (s *Service) process(ctx context.Context, ch chan<- []*targetgroup.Group) b } eps, err := convertToService(o) if err != nil { - level.Error(s.logger).Log("msg", "converting to Service object failed", "err", err) + s.logger.Error("converting to Service object failed", "err", err) return true } send(ctx, ch, s.buildService(eps)) diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index 634a6b1d4b..dfc12417c0 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/linode/linodego" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -138,7 +138,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*linodeMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go index 3c10650653..7bcaa05ba4 100644 --- a/discovery/linode/linode_test.go +++ b/discovery/linode/linode_test.go @@ -19,10 +19,10 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -238,7 +238,7 @@ func TestLinodeSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Endpoint()) require.NoError(t, err) diff --git a/discovery/manager.go b/discovery/manager.go index cefa90a866..87e0ecc44b 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -16,14 +16,14 @@ package discovery import ( "context" "fmt" + "log/slog" "reflect" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -81,9 +81,9 @@ func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]Discovere } // NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager { +func NewManager(ctx context.Context, logger *slog.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } mgr := &Manager{ logger: logger, @@ -104,7 +104,7 @@ func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Re if metrics, err := NewManagerMetrics(registerer, mgr.name); err == nil { mgr.metrics = metrics } else { - level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err) + logger.Error("Failed to create discovery manager metrics", "manager", mgr.name, "err", err) return nil } @@ -141,7 +141,7 @@ func HTTPClientOptions(opts ...config.HTTPClientOption) func(*Manager) { // Manager maintains a set of discovery providers and sends each update to a map channel. // Targets are grouped by the target set name. type Manager struct { - logger log.Logger + logger *slog.Logger name string httpOpts []config.HTTPClientOption mtx sync.RWMutex @@ -294,7 +294,7 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D } func (m *Manager) startProvider(ctx context.Context, p *Provider) { - level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) + m.logger.Debug("Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) ctx, cancel := context.WithCancel(ctx) updates := make(chan []*targetgroup.Group) @@ -328,7 +328,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ case tgs, ok := <-updates: m.metrics.ReceivedUpdates.Inc() if !ok { - level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) + m.logger.Debug("Discoverer channel closed", "provider", p.name) // Wait for provider cancellation to ensure targets are cleaned up when expected. <-ctx.Done() return @@ -364,7 +364,7 @@ func (m *Manager) sender() { case m.syncCh <- m.allGroups(): default: m.metrics.DelayedUpdates.Inc() - level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") + m.logger.Debug("Discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: default: @@ -458,12 +458,12 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int { } typ := cfg.Name() d, err := cfg.NewDiscoverer(DiscovererOptions{ - Logger: log.With(m.logger, "discovery", typ, "config", setName), + Logger: m.logger.With("discovery", typ, "config", setName), HTTPClientOptions: m.httpOpts, Metrics: m.sdMetrics[typ], }) if err != nil { - level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName) + m.logger.Error("Cannot create service discovery", "err", err, "type", typ, "config", setName) failed++ return } diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 831cefe514..b882c0b02e 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -22,10 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -675,7 +675,7 @@ func TestTargetUpdatesOrder(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond @@ -791,7 +791,7 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -828,7 +828,7 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -868,7 +868,7 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -911,7 +911,7 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -979,7 +979,7 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1023,7 +1023,7 @@ func TestDiscovererConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1060,7 +1060,7 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1141,7 +1141,7 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1202,7 +1202,7 @@ func TestGaugeFailedConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1454,7 +1454,7 @@ func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1551,7 +1551,7 @@ func TestUnregisterMetrics(t *testing.T) { refreshMetrics, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) // discoveryManager will be nil if there was an error configuring metrics. require.NotNil(t, discoveryManager) // Unregister all metrics. diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index 38b47accff..f81a4410eb 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math/rand" "net" "net/http" @@ -27,7 +28,6 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -140,7 +140,7 @@ type Discovery struct { } // NewDiscovery returns a new Marathon Discovery. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*marathonMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index 68f6fe3ccc..1a732c0502 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -16,6 +16,7 @@ package moby import ( "context" "fmt" + "log/slog" "net" "net/http" "net/url" @@ -28,7 +29,6 @@ import ( "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -128,7 +128,7 @@ type DockerDiscovery struct { } // NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets. -func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) { +func NewDockerDiscovery(conf *DockerSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) { m, ok := metrics.(*dockerMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go index 398393a15a..00e6a3e4f3 100644 --- a/discovery/moby/docker_test.go +++ b/discovery/moby/docker_test.go @@ -19,9 +19,9 @@ import ( "sort" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -226,7 +226,7 @@ host: %s require.NoError(t, metrics.Register()) defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/dockerswarm.go b/discovery/moby/dockerswarm.go index b0147467d2..9e93e581f3 100644 --- a/discovery/moby/dockerswarm.go +++ b/discovery/moby/dockerswarm.go @@ -16,13 +16,13 @@ package moby import ( "context" "fmt" + "log/slog" "net/http" "net/url" "time" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -125,7 +125,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *DockerSwarmSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dockerswarmMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/moby/mock_test.go b/discovery/moby/mock_test.go index 3f35258c8f..7ef5cb07c3 100644 --- a/discovery/moby/mock_test.go +++ b/discovery/moby/mock_test.go @@ -98,7 +98,7 @@ func (m *SDMock) SetupHandlers() { if len(query) == 2 { h := sha1.New() h.Write([]byte(query[1])) - // Avoing long filenames for Windows. + // Avoiding long filenames for Windows. f += "__" + base64.URLEncoding.EncodeToString(h.Sum(nil))[:10] } } diff --git a/discovery/moby/nodes_test.go b/discovery/moby/nodes_test.go index 4ad1088d1a..973b83c4b6 100644 --- a/discovery/moby/nodes_test.go +++ b/discovery/moby/nodes_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/services_test.go b/discovery/moby/services_test.go index 47ca69e33a..7a966cfeee 100644 --- a/discovery/moby/services_test.go +++ b/discovery/moby/services_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -349,7 +349,7 @@ filters: defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/tasks_test.go b/discovery/moby/tasks_test.go index ef71bc02f5..59d8831c3b 100644 --- a/discovery/moby/tasks_test.go +++ b/discovery/moby/tasks_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go index d9c48120ae..1dbd8f1608 100644 --- a/discovery/nomad/nomad.go +++ b/discovery/nomad/nomad.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" nomad "github.com/hashicorp/nomad/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -121,7 +121,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*nomadMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/nomad/nomad_test.go b/discovery/nomad/nomad_test.go index 357d4a8e9b..32b087524c 100644 --- a/discovery/nomad/nomad_test.go +++ b/discovery/nomad/nomad_test.go @@ -21,9 +21,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -160,7 +160,7 @@ func TestNomadSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) tgs, err := d.refresh(context.Background()) diff --git a/discovery/openstack/hypervisor.go b/discovery/openstack/hypervisor.go index 8964da9294..ec127b1861 100644 --- a/discovery/openstack/hypervisor.go +++ b/discovery/openstack/hypervisor.go @@ -16,10 +16,10 @@ package openstack import ( "context" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors" @@ -43,14 +43,14 @@ type HypervisorDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string - logger log.Logger + logger *slog.Logger port int availability gophercloud.Availability } // newHypervisorDiscovery returns a new hypervisor discovery. func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, - port int, region string, availability gophercloud.Availability, l log.Logger, + port int, region string, availability gophercloud.Availability, l *slog.Logger, ) *HypervisorDiscovery { return &HypervisorDiscovery{ provider: provider, authOpts: opts, diff --git a/discovery/openstack/hypervisor_test.go b/discovery/openstack/hypervisor_test.go index 45684b4a2e..e4a97f32cf 100644 --- a/discovery/openstack/hypervisor_test.go +++ b/discovery/openstack/hypervisor_test.go @@ -93,6 +93,5 @@ func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) + require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) } diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go index 78c669e6f7..2a9e29f2ef 100644 --- a/discovery/openstack/instance.go +++ b/discovery/openstack/instance.go @@ -16,17 +16,17 @@ package openstack import ( "context" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers" "github.com/gophercloud/gophercloud/pagination" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" @@ -52,7 +52,7 @@ type InstanceDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string - logger log.Logger + logger *slog.Logger port int allTenants bool availability gophercloud.Availability @@ -60,10 +60,10 @@ type InstanceDiscovery struct { // NewInstanceDiscovery returns a new instance discovery. func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, - port int, region string, allTenants bool, availability gophercloud.Availability, l log.Logger, + port int, region string, allTenants bool, availability gophercloud.Availability, l *slog.Logger, ) *InstanceDiscovery { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } return &InstanceDiscovery{ provider: provider, authOpts: opts, @@ -134,7 +134,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, for _, s := range instanceList { if len(s.Addresses) == 0 { - level.Info(i.logger).Log("msg", "Got no IP address", "instance", s.ID) + i.logger.Info("Got no IP address", "instance", s.ID) continue } @@ -151,7 +151,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, if !nameOk { flavorID, idOk := s.Flavor["id"].(string) if !idOk { - level.Warn(i.logger).Log("msg", "Invalid type for both flavor original_name and flavor id, expected string") + i.logger.Warn("Invalid type for both flavor original_name and flavor id, expected string") continue } labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorID) @@ -171,22 +171,22 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, for pool, address := range s.Addresses { md, ok := address.([]interface{}) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected array") + i.logger.Warn("Invalid type for address, expected array") continue } if len(md) == 0 { - level.Debug(i.logger).Log("msg", "Got no IP address", "instance", s.ID) + i.logger.Debug("Got no IP address", "instance", s.ID) continue } for _, address := range md { md1, ok := address.(map[string]interface{}) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected dict") + i.logger.Warn("Invalid type for address, expected dict") continue } addr, ok := md1["addr"].(string) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected string") + i.logger.Warn("Invalid type for address, expected string") continue } if _, ok := floatingIPPresent[addr]; ok { diff --git a/discovery/openstack/instance_test.go b/discovery/openstack/instance_test.go index 2b5ac1b89e..2617baa4e3 100644 --- a/discovery/openstack/instance_test.go +++ b/discovery/openstack/instance_test.go @@ -134,6 +134,5 @@ func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) + require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) } diff --git a/discovery/openstack/openstack.go b/discovery/openstack/openstack.go index c98f78788d..fa7e0cce90 100644 --- a/discovery/openstack/openstack.go +++ b/discovery/openstack/openstack.go @@ -17,10 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "time" - "github.com/go-kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/mwitkow/go-conntrack" @@ -142,7 +142,7 @@ type refresher interface { } // NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, l *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*openstackMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -163,7 +163,7 @@ func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetr ), nil } -func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) { var opts gophercloud.AuthOptions if conf.IdentityEndpoint == "" { var err error diff --git a/discovery/ovhcloud/dedicated_server.go b/discovery/ovhcloud/dedicated_server.go index a70857a08b..15bb9809c9 100644 --- a/discovery/ovhcloud/dedicated_server.go +++ b/discovery/ovhcloud/dedicated_server.go @@ -16,13 +16,12 @@ package ovhcloud import ( "context" "fmt" + "log/slog" "net/netip" "net/url" "path" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/common/model" @@ -55,10 +54,10 @@ type dedicatedServer struct { type dedicatedServerDiscovery struct { *refresh.Discovery config *SDConfig - logger log.Logger + logger *slog.Logger } -func newDedicatedServerDiscovery(conf *SDConfig, logger log.Logger) *dedicatedServerDiscovery { +func newDedicatedServerDiscovery(conf *SDConfig, logger *slog.Logger) *dedicatedServerDiscovery { return &dedicatedServerDiscovery{config: conf, logger: logger} } @@ -115,10 +114,7 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou for _, dedicatedServerName := range dedicatedServerList { dedicatedServer, err := getDedicatedServerDetails(client, dedicatedServerName) if err != nil { - err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error()) - if err != nil { - return nil, err - } + d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error()) continue } dedicatedServerDetailedList = append(dedicatedServerDetailedList, *dedicatedServer) diff --git a/discovery/ovhcloud/dedicated_server_test.go b/discovery/ovhcloud/dedicated_server_test.go index 52311bcc87..f9dbd6af9c 100644 --- a/discovery/ovhcloud/dedicated_server_test.go +++ b/discovery/ovhcloud/dedicated_server_test.go @@ -21,8 +21,8 @@ import ( "os" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -41,7 +41,7 @@ application_secret: %s consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecretTest, ovhcloudConsumerKeyTest) require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg)) - d, err := newRefresher(&cfg, log.NewNopLogger()) + d, err := newRefresher(&cfg, promslog.NewNopLogger()) require.NoError(t, err) ctx := context.Background() targetGroups, err := d.refresh(ctx) diff --git a/discovery/ovhcloud/ovhcloud.go b/discovery/ovhcloud/ovhcloud.go index 988b4482f2..08ed70296b 100644 --- a/discovery/ovhcloud/ovhcloud.go +++ b/discovery/ovhcloud/ovhcloud.go @@ -17,10 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "net/netip" "time" - "github.com/go-kit/log" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -137,7 +137,7 @@ func parseIPList(ipList []string) ([]netip.Addr, error) { return ipAddresses, nil } -func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, logger *slog.Logger) (refresher, error) { switch conf.Service { case "vps": return newVpsDiscovery(conf, logger), nil @@ -148,7 +148,7 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) { } // NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ovhcloudMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/ovhcloud/ovhcloud_test.go b/discovery/ovhcloud/ovhcloud_test.go index 9c95bf90e6..84a35af3ad 100644 --- a/discovery/ovhcloud/ovhcloud_test.go +++ b/discovery/ovhcloud/ovhcloud_test.go @@ -20,11 +20,11 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/util/testutil" ) var ( @@ -121,7 +121,7 @@ func TestParseIPs(t *testing.T) { func TestDiscoverer(t *testing.T) { conf, _ := getMockConf("vps") - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() reg := prometheus.NewRegistry() refreshMetrics := discovery.NewRefreshMetrics(reg) diff --git a/discovery/ovhcloud/vps.go b/discovery/ovhcloud/vps.go index 58ceeabd87..7050f826a5 100644 --- a/discovery/ovhcloud/vps.go +++ b/discovery/ovhcloud/vps.go @@ -16,13 +16,12 @@ package ovhcloud import ( "context" "fmt" + "log/slog" "net/netip" "net/url" "path" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/common/model" @@ -68,10 +67,10 @@ type virtualPrivateServer struct { type vpsDiscovery struct { *refresh.Discovery config *SDConfig - logger log.Logger + logger *slog.Logger } -func newVpsDiscovery(conf *SDConfig, logger log.Logger) *vpsDiscovery { +func newVpsDiscovery(conf *SDConfig, logger *slog.Logger) *vpsDiscovery { return &vpsDiscovery{config: conf, logger: logger} } @@ -133,10 +132,7 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) { for _, vpsName := range vpsList { vpsDetailed, err := getVpsDetails(client, vpsName) if err != nil { - err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error()) - if err != nil { - return nil, err - } + d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error()) continue } vpsDetailedList = append(vpsDetailedList, *vpsDetailed) diff --git a/discovery/ovhcloud/vps_test.go b/discovery/ovhcloud/vps_test.go index 2d2d6dcd21..00d59da7f0 100644 --- a/discovery/ovhcloud/vps_test.go +++ b/discovery/ovhcloud/vps_test.go @@ -23,8 +23,8 @@ import ( yaml "gopkg.in/yaml.v2" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -43,7 +43,7 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg)) - d, err := newRefresher(&cfg, log.NewNopLogger()) + d, err := newRefresher(&cfg, promslog.NewNopLogger()) require.NoError(t, err) ctx := context.Background() targetGroups, err := d.refresh(ctx) diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 8f89acbf93..6122a76da7 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -19,6 +19,7 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "net/url" @@ -27,11 +28,11 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/discovery" @@ -138,14 +139,14 @@ type Discovery struct { } // NewDiscovery returns a new PuppetDB discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*puppetdbMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http") diff --git a/discovery/puppetdb/puppetdb_test.go b/discovery/puppetdb/puppetdb_test.go index bf9c7b215e..4585b78223 100644 --- a/discovery/puppetdb/puppetdb_test.go +++ b/discovery/puppetdb/puppetdb_test.go @@ -22,10 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -70,7 +70,7 @@ func TestPuppetSlashInURL(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) require.Equal(t, apiURL, d.url) @@ -94,7 +94,7 @@ func TestPuppetDBRefresh(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -142,7 +142,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -201,7 +201,7 @@ func TestPuppetDBInvalidCode(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -229,7 +229,7 @@ func TestPuppetDBInvalidFormat(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/refresh/refresh.go b/discovery/refresh/refresh.go index f037a90cff..31646c0e4c 100644 --- a/discovery/refresh/refresh.go +++ b/discovery/refresh/refresh.go @@ -16,17 +16,17 @@ package refresh import ( "context" "errors" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) type Options struct { - Logger log.Logger + Logger *slog.Logger Mech string Interval time.Duration RefreshF func(ctx context.Context) ([]*targetgroup.Group, error) @@ -35,7 +35,7 @@ type Options struct { // Discovery implements the Discoverer interface. type Discovery struct { - logger log.Logger + logger *slog.Logger interval time.Duration refreshf func(ctx context.Context) ([]*targetgroup.Group, error) metrics *discovery.RefreshMetrics @@ -45,9 +45,9 @@ type Discovery struct { func NewDiscovery(opts Options) *Discovery { m := opts.MetricsInstantiator.Instantiate(opts.Mech) - var logger log.Logger + var logger *slog.Logger if opts.Logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } else { logger = opts.Logger } @@ -68,7 +68,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { tgs, err := d.refresh(ctx) if err != nil { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + d.logger.Error("Unable to refresh target groups", "err", err.Error()) } } else { select { @@ -87,7 +87,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { tgs, err := d.refresh(ctx) if err != nil { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + d.logger.Error("Unable to refresh target groups", "err", err.Error()) } continue } diff --git a/discovery/scaleway/scaleway.go b/discovery/scaleway/scaleway.go index f8e1a83f5e..670e439c4f 100644 --- a/discovery/scaleway/scaleway.go +++ b/discovery/scaleway/scaleway.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "os" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -185,7 +185,7 @@ func init() { // the Discoverer interface. type Discovery struct{} -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*scalewayMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/triton/triton.go b/discovery/triton/triton.go index 675149f2a3..7b3b18f471 100644 --- a/discovery/triton/triton.go +++ b/discovery/triton/triton.go @@ -19,12 +19,12 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "net/url" "strings" "time" - "github.com/go-kit/log" "github.com/mwitkow/go-conntrack" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -146,7 +146,7 @@ type Discovery struct { } // New returns a new Discovery which periodically refreshes its targets. -func New(logger log.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func New(logger *slog.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*tritonMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/triton/triton_test.go b/discovery/triton/triton_test.go index e37693e6bf..b2d06afaf6 100644 --- a/discovery/triton/triton_test.go +++ b/discovery/triton/triton_test.go @@ -21,7 +21,6 @@ import ( "net/http/httptest" "net/url" "strconv" - "strings" "testing" "github.com/prometheus/client_golang/prometheus" @@ -182,8 +181,7 @@ func TestTritonSDRefreshNoServer(t *testing.T) { td, m, _ := newTritonDiscovery(conf) _, err := td.refresh(context.Background()) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint")) + require.ErrorContains(t, err, "an error occurred when requesting targets from the discovery endpoint") m.Unregister() } @@ -193,8 +191,7 @@ func TestTritonSDRefreshCancelled(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := td.refresh(ctx) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), context.Canceled.Error())) + require.ErrorContains(t, err, context.Canceled.Error()) m.Unregister() } diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index c8af2f1587..de806895d7 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "net/url" "path" @@ -24,7 +25,6 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/kolo/xmlrpc" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -41,10 +41,10 @@ const ( uyuniMetaLabelPrefix = model.MetaLabelPrefix + "uyuni_" uyuniLabelMinionHostname = uyuniMetaLabelPrefix + "minion_hostname" uyuniLabelPrimaryFQDN = uyuniMetaLabelPrefix + "primary_fqdn" - uyuniLablelSystemID = uyuniMetaLabelPrefix + "system_id" - uyuniLablelGroups = uyuniMetaLabelPrefix + "groups" - uyuniLablelEndpointName = uyuniMetaLabelPrefix + "endpoint_name" - uyuniLablelExporter = uyuniMetaLabelPrefix + "exporter" + uyuniLabelSystemID = uyuniMetaLabelPrefix + "system_id" + uyuniLabelGroups = uyuniMetaLabelPrefix + "groups" + uyuniLabelEndpointName = uyuniMetaLabelPrefix + "endpoint_name" + uyuniLabelExporter = uyuniMetaLabelPrefix + "exporter" uyuniLabelProxyModule = uyuniMetaLabelPrefix + "proxy_module" uyuniLabelMetricsPath = uyuniMetaLabelPrefix + "metrics_path" uyuniLabelScheme = uyuniMetaLabelPrefix + "scheme" @@ -109,7 +109,7 @@ type Discovery struct { entitlement string separator string interval time.Duration - logger log.Logger + logger *slog.Logger } // NewDiscovererMetrics implements discovery.Config. @@ -212,7 +212,7 @@ func getEndpointInfoForSystems( } // NewDiscovery returns a uyuni discovery for the given configuration. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*uyuniMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -270,10 +270,10 @@ func (d *Discovery) getEndpointLabels( model.AddressLabel: model.LabelValue(addr), uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname), uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN), - uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), - uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), - uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName), - uyuniLablelExporter: model.LabelValue(endpoint.ExporterName), + uyuniLabelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), + uyuniLabelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), + uyuniLabelEndpointName: model.LabelValue(endpoint.EndpointName), + uyuniLabelExporter: model.LabelValue(endpoint.ExporterName), uyuniLabelProxyModule: model.LabelValue(endpoint.Module), uyuniLabelMetricsPath: model.LabelValue(endpoint.Path), uyuniLabelScheme: model.LabelValue(scheme), diff --git a/discovery/vultr/vultr.go b/discovery/vultr/vultr.go index aaa9c64e47..f82b22168a 100644 --- a/discovery/vultr/vultr.go +++ b/discovery/vultr/vultr.go @@ -16,13 +16,13 @@ package vultr import ( "context" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -114,7 +114,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*vultrMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/vultr/vultr_test.go b/discovery/vultr/vultr_test.go index 2f12a35529..00ef21e38c 100644 --- a/discovery/vultr/vultr_test.go +++ b/discovery/vultr/vultr_test.go @@ -19,9 +19,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -57,7 +57,7 @@ func TestVultrSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdMock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/xds/client_test.go b/discovery/xds/client_test.go index b699995fb7..2cf5b2f9cb 100644 --- a/discovery/xds/client_test.go +++ b/discovery/xds/client_test.go @@ -52,16 +52,14 @@ func TestMakeXDSResourceHttpEndpointEmptyServerURLScheme(t *testing.T) { endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("127.0.0.1"), "monitoring") require.Empty(t, endpointURL) - require.Error(t, err) - require.Equal(t, "invalid xDS server URL", err.Error()) + require.EqualError(t, err, "invalid xDS server URL") } func TestMakeXDSResourceHttpEndpointEmptyServerURLHost(t *testing.T) { endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("grpc://127.0.0.1"), "monitoring") require.Empty(t, endpointURL) - require.Error(t, err) - require.Contains(t, err.Error(), "must be either 'http' or 'https'") + require.ErrorContains(t, err, "must be either 'http' or 'https'") } func TestMakeXDSResourceHttpEndpoint(t *testing.T) { diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index d1d540aaf4..55b3d628e5 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -15,14 +15,14 @@ package xds import ( "fmt" + "log/slog" "net/url" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "google.golang.org/protobuf/types/known/anypb" "github.com/prometheus/prometheus/discovery" @@ -99,7 +99,7 @@ func (c *KumaSDConfig) SetDirectory(dir string) { func (c *KumaSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { logger := opts.Logger if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return NewKumaHTTPDiscovery(c, logger, opts.Metrics) @@ -158,7 +158,7 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L return targets, nil } -func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) { +func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) { m, ok := metrics.(*xdsMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -170,7 +170,7 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discove var err error clientID, err = osutil.GetFQDN() if err != nil { - level.Debug(logger).Log("msg", "error getting FQDN", "err", err) + logger.Debug("error getting FQDN", "err", err) clientID = "prometheus" } } diff --git a/discovery/xds/kuma_mads.pb.go b/discovery/xds/kuma_mads.pb.go index b1079bf23f..210a5343a4 100644 --- a/discovery/xds/kuma_mads.pb.go +++ b/discovery/xds/kuma_mads.pb.go @@ -23,13 +23,14 @@ package xds import ( context "context" + reflect "reflect" + sync "sync" + v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" _ "github.com/envoyproxy/protoc-gen-validate/validate" _ "google.golang.org/genproto/googleapis/api/annotations" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" ) const ( diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go index cfb9cbac50..23d754c4b7 100644 --- a/discovery/xds/kuma_test.go +++ b/discovery/xds/kuma_test.go @@ -201,9 +201,8 @@ func TestKumaMadsV1ResourceParserInvalidResources(t *testing.T) { }} groups, err := kumaMadsV1ResourceParser(resources, KumaMadsV1ResourceTypeURL) require.Nil(t, groups) - require.Error(t, err) - require.Contains(t, err.Error(), "cannot parse") + require.ErrorContains(t, err, "cannot parse") } func TestNewKumaHTTPDiscovery(t *testing.T) { diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go index 8191d6be1a..db55a2b6f7 100644 --- a/discovery/xds/xds.go +++ b/discovery/xds/xds.go @@ -15,11 +15,10 @@ package xds import ( "context" + "log/slog" "time" v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "google.golang.org/protobuf/encoding/protojson" @@ -104,7 +103,7 @@ type fetchDiscovery struct { refreshInterval time.Duration parseResources resourceParser - logger log.Logger + logger *slog.Logger metrics *xdsMetrics } @@ -140,7 +139,7 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou } if err != nil { - level.Error(d.logger).Log("msg", "error parsing resources", "err", err) + d.logger.Error("error parsing resources", "err", err) d.metrics.fetchFailuresCount.Inc() return } @@ -153,12 +152,12 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou parsedTargets, err := d.parseResources(response.Resources, response.TypeUrl) if err != nil { - level.Error(d.logger).Log("msg", "error parsing resources", "err", err) + d.logger.Error("error parsing resources", "err", err) d.metrics.fetchFailuresCount.Inc() return } - level.Debug(d.logger).Log("msg", "Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets)) + d.logger.Debug("Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets)) select { case <-ctx.Done(): diff --git a/discovery/xds/xds_test.go b/discovery/xds/xds_test.go index 7cce021c5f..db10adc1a2 100644 --- a/discovery/xds/xds_test.go +++ b/discovery/xds/xds_test.go @@ -22,9 +22,9 @@ import ( "time" v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "google.golang.org/protobuf/types/known/anypb" @@ -90,7 +90,7 @@ func constantResourceParser(targets []model.LabelSet, err error) resourceParser } } -var nopLogger = log.NewNopLogger() +var nopLogger = promslog.NewNopLogger() type testResourceClient struct { resourceTypeURL string diff --git a/discovery/zookeeper/zookeeper.go b/discovery/zookeeper/zookeeper.go index 92904dd71c..a1cfe3d055 100644 --- a/discovery/zookeeper/zookeeper.go +++ b/discovery/zookeeper/zookeeper.go @@ -18,15 +18,16 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/go-zookeeper/zk" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -146,16 +147,16 @@ type Discovery struct { treeCaches []*treecache.ZookeeperTreeCache parse func(data []byte, path string) (model.LabelSet, error) - logger log.Logger + logger *slog.Logger } // NewNerveDiscovery returns a new Discovery for the given Nerve config. -func NewNerveDiscovery(conf *NerveSDConfig, logger log.Logger) (*Discovery, error) { +func NewNerveDiscovery(conf *NerveSDConfig, logger *slog.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseNerveMember) } // NewServersetDiscovery returns a new Discovery for the given serverset config. -func NewServersetDiscovery(conf *ServersetSDConfig, logger log.Logger) (*Discovery, error) { +func NewServersetDiscovery(conf *ServersetSDConfig, logger *slog.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseServersetMember) } @@ -165,11 +166,11 @@ func NewDiscovery( srvs []string, timeout time.Duration, paths []string, - logger log.Logger, + logger *slog.Logger, pf func(data []byte, path string) (model.LabelSet, error), ) (*Discovery, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } conn, _, err := zk.Connect( diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 46c3e0b84b..a179a2f9f1 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -21,6 +21,7 @@ The Prometheus monitoring server | --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | | --web.read-timeout | Maximum duration before timing out read of the request, and closing idle connections. | `5m` | | --web.max-connections | Maximum number of simultaneous connections across all listeners. | `512` | +| --web.max-notifications-subscribers | Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close. | `16` | | --web.external-url | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. | | | --web.route-prefix | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. | | | --web.user-assets | Path to static asset directory, available at /user. | | @@ -28,6 +29,7 @@ The Prometheus monitoring server | --web.enable-admin-api | Enable API endpoints for admin control actions. | `false` | | --web.enable-remote-write-receiver | Enable API endpoint accepting remote write requests. | `false` | | --web.remote-write-receiver.accepted-protobuf-messages | List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: prometheus.WriteRequest, io.prometheus.write.v2.Request | `prometheus.WriteRequest` | +| --web.enable-otlp-receiver | Enable API endpoint accepting OTLP write requests. | `false` | | --web.console.templates | Path to the console template directory, available at /consoles. | `consoles` | | --web.console.libraries | Path to the console library directory. | `console_libraries` | | --web.page-title | Document title of Prometheus instance. | `Prometheus Time Series Collection and Processing Server` | @@ -56,7 +58,7 @@ The Prometheus monitoring server | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | | --agent | Run Prometheus in 'Agent mode'. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index e48cede79c..5e2a8f6bb1 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -15,7 +15,7 @@ Tooling for the Prometheus monitoring system. | -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | --version | Show application version. | | --experimental | Enable experimental commands. | -| --enable-feature ... | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. | +| --enable-feature ... | Comma separated feature names to enable. Currently unused. | @@ -462,6 +462,7 @@ Unit tests for rules. | Flag | Description | Default | | --- | --- | --- | | --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | +| --debug | Enable unit test debugging. | `false` | | --diff | [Experimental] Print colored differential output between expected & received output. | `false` | diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md index 3c1ec84f0f..cd33dba8e3 100644 --- a/docs/configuration/alerting_rules.md +++ b/docs/configuration/alerting_rules.md @@ -21,10 +21,13 @@ An example rules file with an alert would be: ```yaml groups: - name: example + labels: + team: myteam rules: - alert: HighRequestLatency expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 for: 10m + keep_firing_for: 5m labels: severity: page annotations: @@ -38,6 +41,13 @@ the alert continues to be active during each evaluation for 10 minutes before firing the alert. Elements that are active, but not firing yet, are in the pending state. Alerting rules without the `for` clause will become active on the first evaluation. +There is also an optional `keep_firing_for` clause that tells Prometheus to keep +this alert firing for the specified duration after the firing condition was last met. +This can be used to prevent situations such as flapping alerts, false resolutions +due to lack of data loss, etc. Alerting rules without the `keep_firing_for` clause +will deactivate on the first evaluation where the condition is not met (assuming +any optional `for` duration desribed above has been satisfied). + The `labels` clause allows specifying a set of additional labels to be attached to the alert. Any existing conflicting labels will be overwritten. The label values can be templated. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 80bb3afee9..2093ed8836 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -71,12 +71,19 @@ global: # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] - # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. - # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping. + # Offset the rule evaluation timestamp of this particular group by the + # specified duration into the past to ensure the underlying metrics have + # been received. Metric availability delays are more likely to occur when + # Prometheus is running as a remote write target, but can also occur when + # there's anomalies with scraping. [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). + # external systems (federation, remote storage, Alertmanager). + # Environment variable references `${var}` or `$var` are replaced according + # to the values of the current environment variables. + # References to undefined variables are replaced by the empty string. + # The `$` character can be escaped by using `$$`. external_labels: [ : ... ] @@ -94,27 +101,29 @@ global: # change or be removed in the future. [ body_size_limit: | default = 0 ] - # Per-scrape limit on number of scraped samples that will be accepted. + # Per-scrape limit on the number of scraped samples that will be accepted. # If more than this number of samples are present after metric relabeling # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] - # Per-scrape limit on number of labels that will be accepted for a sample. If - # more than this number of labels are present post metric-relabeling, the - # entire scrape will be treated as failed. 0 means no limit. + # Limit on the number of labels that will be accepted per sample. If more + # than this number of labels are present on any sample post metric-relabeling, + # the entire scrape will be treated as failed. 0 means no limit. [ label_limit: | default = 0 ] - # Per-scrape limit on length of labels name that will be accepted for a sample. - # If a label name is longer than this number post metric-relabeling, the entire - # scrape will be treated as failed. 0 means no limit. + # Limit on the length (in bytes) of each individual label name. If any label + # name in a scrape is longer than this number post metric-relabeling, the + # entire scrape will be treated as failed. Note that label names are UTF-8 + # encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_name_length_limit: | default = 0 ] - # Per-scrape limit on length of labels value that will be accepted for a sample. - # If a label value is longer than this number post metric-relabeling, the - # entire scrape will be treated as failed. 0 means no limit. + # Limit on the length (in bytes) of each individual label value. If any label + # value in a scrape is longer than this number post metric-relabeling, the + # entire scrape will be treated as failed. Note that label values are UTF-8 + # encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_value_length_limit: | default = 0 ] - # Per-scrape config limit on number of unique targets that will be + # Limit per scrape config on number of unique targets that will be # accepted. If more than this number of targets are present after target # relabeling, Prometheus will mark the targets as failed without scraping them. # 0 means no limit. This is an experimental feature, this behaviour could @@ -203,12 +212,18 @@ job_name: # The protocols to negotiate during a scrape with the client. # Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, -# OpenMetricsText1.0.0, PrometheusText0.0.4. +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. [ scrape_protocols: [, ...] | default = ] -# Whether to scrape a classic histogram that is also exposed as a native +# Fallback protocol to use if a scrape returns blank, unparseable, or otherwise +# invalid Content-Type. +# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. +[ fallback_scrape_protocol: ] + +# Whether to scrape a classic histogram, even if it is also exposed as a native # histogram (has no effect without --enable-feature=native-histograms). -[ scrape_classic_histograms: | default = false ] +[ always_scrape_classic_histograms: | default = false ] # The HTTP resource path on which to fetch metrics from targets. [ metrics_path: | default = /metrics ] @@ -264,69 +279,14 @@ params: # response from the scraped target. [ enable_compression: | default = true ] -# Sets the `Authorization` header on every scrape request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Sets the `Authorization` header on every scrape request with -# the configured credentials. -authorization: - # Sets the authentication type of the request. - [ type: | default: Bearer ] - # Sets the credentials of the request. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials of the request with the credentials read from the - # configured file. It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether scrape requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - # File to which scrape failures are logged. # Reloading the configuration will reopen the file. [ scrape_failure_log_file: ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] + # List of Azure service discovery configurations. azure_sd_configs: [ - ... ] @@ -454,34 +414,36 @@ metric_relabel_configs: # change or be removed in the future. [ body_size_limit: | default = 0 ] -# Per-scrape limit on number of scraped samples that will be accepted. +# Per-scrape limit on the number of scraped samples that will be accepted. # If more than this number of samples are present after metric relabeling # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] -# Per-scrape limit on number of labels that will be accepted for a sample. If -# more than this number of labels are present post metric-relabeling, the -# entire scrape will be treated as failed. 0 means no limit. +# Limit on the number of labels that will be accepted per sample. If more +# than this number of labels are present on any sample post metric-relabeling, +# the entire scrape will be treated as failed. 0 means no limit. [ label_limit: | default = 0 ] -# Per-scrape limit on length of labels name that will be accepted for a sample. -# If a label name is longer than this number post metric-relabeling, the entire -# scrape will be treated as failed. 0 means no limit. +# Limit on the length (in bytes) of each individual label name. If any label +# name in a scrape is longer than this number post metric-relabeling, the +# entire scrape will be treated as failed. Note that label names are UTF-8 +# encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_name_length_limit: | default = 0 ] -# Per-scrape limit on length of labels value that will be accepted for a sample. -# If a label value is longer than this number post metric-relabeling, the -# entire scrape will be treated as failed. 0 means no limit. +# Limit on the length (in bytes) of each individual label value. If any label +# value in a scrape is longer than this number post metric-relabeling, the +# entire scrape will be treated as failed. Note that label values are UTF-8 +# encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_value_length_limit: | default = 0 ] -# Per-scrape config limit on number of unique targets that will be +# Limit per scrape config on number of unique targets that will be # accepted. If more than this number of targets are present after target # relabeling, Prometheus will mark the targets as failed without scraping them. # 0 means no limit. This is an experimental feature, this behaviour could # change in the future. [ target_limit: | default = 0 ] -# Per-job limit on the number of targets dropped by relabeling +# Limit per scrape config on the number of targets dropped by relabeling # that will be kept in memory. 0 means no limit. [ keep_dropped_targets: | default = 0 ] @@ -540,6 +502,73 @@ metric_relabel_configs: Where `` must be unique across all scrape configurations. +### `` + +A `http_config` allows configuring HTTP requests. + +``` +# Sets the `Authorization` header on every request with the +# configured username and password. +# username and username_file are mutually exclusive. +# password and password_file are mutually exclusive. +basic_auth: + [ username: ] + [ username_file: ] + [ password: ] + [ password_file: ] + +# Sets the `Authorization` header on every request with +# the configured credentials. +authorization: + # Sets the authentication type of the request. + [ type: | default: Bearer ] + # Sets the credentials of the request. It is mutually exclusive with + # `credentials_file`. + [ credentials: ] + # Sets the credentials of the request with the credentials read from the + # configured file. It is mutually exclusive with `credentials`. + [ credentials_file: ] + +# Optional OAuth 2.0 configuration. +# Cannot be used at the same time as basic_auth or authorization. +oauth2: + [ ] + +# Configure whether requests follow HTTP 3xx redirects. +[ follow_redirects: | default = true ] + +# Whether to enable HTTP2. +[ enable_http2: | default: true ] + +# Configures the request's TLS settings. +tls_config: + [ ] + +# Optional proxy URL. +[ proxy_url: ] +# Comma-separated string that can contain IPs, CIDR notation, domain names +# that should be excluded from proxying. IP and domain names can +# contain port numbers. +[ no_proxy: ] +# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) +[ proxy_from_environment: | default: false ] +# Specifies headers to send to proxies during CONNECT requests. +[ proxy_connect_header: + [ : [, ...] ] ] + +# Custom HTTP headers to be sent along with each request. +# Headers that are set by Prometheus itself can't be overwritten. +http_headers: + # Header name. + [ : + # Header values. + [ values: [, ...] ] + # Headers values. Hidden in configuration page. + [ secrets: [, ...] ] + # Files to read header values from. + [ files: [, ...] ] ] +``` + ### `` A `tls_config` allows configuring TLS connections. @@ -681,65 +710,9 @@ subscription_id: # instead be specified in the relabeling rule. [ port: | default = 80 ] -# Authentication information used to authenticate to the Azure API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not support by Azure. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by Azure. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by Azure. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -785,14 +758,17 @@ The following meta labels are available on targets during [relabeling](#relabel_ services: [ - ] -# See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more -# about the possible filters that can be used. +# A Consul Filter expression used to filter the catalog results +# See https://www.consul.io/api-docs/catalog#list-services to know more +# about the filter expressions that can be used. +[ filter: ] +# The `tags` and `node_meta` fields are deprecated in Consul in favor of `filter`. # An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list. tags: [ - ] -# Node metadata key/value pairs to filter nodes for a given service. +# Node metadata key/value pairs to filter nodes for a given service. As of Consul 1.14, consider `filter` instead. [ node_meta: [ : ... ] ] @@ -806,65 +782,9 @@ tags: # On large setup it might be a good idea to increase this value because the catalog will change all the time. [ refresh_interval: | default = 30s ] -# Authentication information used to authenticate to the consul server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` Note that the IP number and port used to scrape the targets is assembled as @@ -904,72 +824,15 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_digitalocean_vpc`: the id of the droplet's VPC ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, not currently supported by DigitalOcean. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the droplets are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -1001,34 +864,6 @@ See below for the configuration options for Docker discovery: # Address of the Docker daemon. host: -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from, when `role` is nodes, and for discovered # tasks and services that don't have published ports. [ port: | default = 80 ] @@ -1052,39 +887,9 @@ tls_config: # The time after which the containers are refreshed. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1193,34 +998,6 @@ See below for the configuration options for Docker Swarm discovery: # Address of the Docker daemon. host: -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - # Role of the targets to retrieve. Must be `services`, `tasks`, or `nodes`. role: @@ -1241,39 +1018,9 @@ role: # The time after which the service discovery data is refreshed. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1388,65 +1135,9 @@ filters: [ - name: values: , [...] ] -# Authentication information used to authenticate to the EC2 API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not supported by AWS. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by AWS. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutuall exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by AWS. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1675,63 +1366,9 @@ query: # The port to scrape metrics from. [ port: | default = 80 ] -# TLS configuration to connect to the PuppetDB. -tls_config: - [ ] - -# basic_auth, authorization, and oauth2, are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# `Authorization` HTTP header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [this example Prometheus configuration file](/documentation/examples/prometheus-puppetdb.yml) @@ -1912,74 +1549,15 @@ The labels below are only available for targets with `role` set to `robot`: # One of robot or hcloud. role: -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, required when role is robot -# Role hcloud does not support basic auth. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, required when role is -# hcloud. Role robot does not support bearer token authentication. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the servers are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2021,65 +1599,9 @@ url: # Refresh interval to re-query the endpoint. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the API server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2113,74 +1635,15 @@ following meta labels are available on all targets during # The unique ID of the data center. datacenter_id: -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, required when using IONOS -# Cloud username and password as authentication method. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, required when using IONOS -# Cloud token as authentication method. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the servers are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2366,66 +1829,6 @@ role: # Note that api_server and kube_config are mutually exclusive. [ kubeconfig_file: ] -# Optional authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # Optional namespace discovery. If omitted, all namespaces are used. namespaces: own_namespace: @@ -2455,6 +1858,10 @@ attach_metadata: # Attaches node metadata to discovered targets. Valid for roles: pod, endpoints, endpointslice. # When set to true, Prometheus must have permissions to get Nodes. [ node: | default = false ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [this example Prometheus configuration file](/documentation/examples/prometheus-kubernetes.yml) @@ -2495,66 +1902,9 @@ server: # The time after which the monitoring assignments are refreshed. [ fetch_timeout: | default = 2m ] -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional the `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful way @@ -2608,65 +1958,9 @@ See below for the configuration options for Lightsail discovery: # instead be specified in the relabeling rule. [ port: | default = 80 ] -# Authentication information used to authenticate to the Lightsail API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not supported by AWS. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by AWS. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutuall exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by AWS. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2677,6 +1971,8 @@ This service discovery uses the public IPv4 address by default, by that can be changed with relabeling, as demonstrated in [the Prometheus linode-sd configuration file](/documentation/examples/prometheus-linode.yml). +Linode APIv4 Token must be created with scopes: `linodes:read_only`, `ips:read_only`, and `events:read_only`. + The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_linode_instance_id`: the id of the linode instance @@ -2704,71 +2000,10 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_linode_ipv6_ranges`: a list of IPv6 ranges with mask assigned to the linode instance joined by the tag separator ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. -# Note: Linode APIv4 Token must be created with scopes: 'linodes:read_only', 'ips:read_only', and 'events:read_only' - -# Optional HTTP basic authentication information, not currently supported by Linode APIv4. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional the `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] # Optional region to filter on. [ region: ] -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] @@ -2777,6 +2012,10 @@ tls_config: # The time after which the linode instances are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2817,67 +2056,9 @@ servers: # It is mutually exclusive with `auth_token` and other authentication mechanisms. [ auth_token_file: ] -# Sets the `Authorization` header on every request with the -# configured username and password. -# This is mutually exclusive with other authentication mechanisms. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -# NOTE: The current version of DC/OS marathon (v1.11.0) does not support -# standard `Authentication` header, use `auth_token` or `auth_token_file` -# instead. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration for connecting to marathon servers -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` By default every app listed in Marathon will be scraped by Prometheus. If not all @@ -2939,65 +2120,9 @@ The following meta labels are available on targets during [relabeling](#relabel_ [ server: ] [ tag_separator: | default = ,] -# Authentication information used to authenticate to the nomad server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -3135,66 +2260,12 @@ See below for the configuration options for Eureka discovery: # The URL to connect to the Eureka server. server: -# Sets the `Authorization` header on every request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - # Refresh interval to re-read the app instance list. [ refresh_interval: | default = 30s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [the Prometheus eureka-sd configuration file](/documentation/examples/prometheus-eureka.yml) @@ -3295,39 +2366,9 @@ tags_filter: # Refresh interval to re-read the targets list. [ refresh_interval: | default = 60s ] -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -3367,61 +2408,9 @@ password: # Refresh interval to re-read the managed targets list. [ refresh_interval: | default = 60s ] -# Optional HTTP basic authentication information, currently not supported by Uyuni. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by Uyuni. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by Uyuni. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [the Prometheus uyuni-sd configuration file](/documentation/examples/prometheus-uyuni.yml) @@ -3456,72 +2445,15 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_vultr_instance_allowed_bandwidth_gb` : Monthly bandwidth quota in GB. ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, not currently supported by Vultr. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the instances are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` @@ -3673,25 +2605,6 @@ through the `__alerts_path__` label. # Configures the protocol scheme used for requests. [ scheme: | default = http ] -# Sets the `Authorization` header on every request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - # Optionally configures AWS's Signature Verification 4 signing process to sign requests. # Cannot be set at the same time as basic_auth, authorization, oauth2, azuread or google_iam. # To use the default credentials from the AWS SDK, use `sigv4: {}`. @@ -3711,44 +2624,9 @@ sigv4: # AWS Role ARN, an alternative to using AWS API keys. [ role_arn: ] -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] # List of Azure service discovery configurations. azure_sd_configs: @@ -3919,25 +2797,6 @@ write_relabel_configs: # For the `io.prometheus.write.v2.Request` message, this option is noop (always true). [ send_native_histograms: | default = false ] -# Sets the `Authorization` header on every remote write request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default = Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - # Optionally configures AWS's Signature Verification 4 signing process to # sign requests. Cannot be set at the same time as basic_auth, authorization, oauth2, or azuread. # To use the default credentials from the AWS SDK, use `sigv4: {}`. @@ -3957,11 +2816,6 @@ sigv4: # AWS Role ARN, an alternative to using AWS API keys. [ role_arn: ] -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth, authorization, sigv4, azuread or google_iam. -oauth2: - [ ] - # Optional AzureAD configuration. # Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or google_iam. azuread: @@ -3988,43 +2842,9 @@ azuread: # Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or azuread. # To use the default credentials from the Google Cloud SDK, use `google_iam: {}`. google_iam: - # Service account key with monitoring write permessions. + # Service account key with monitoring write permissions. credentials_file: -# Configures the remote write request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default = false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default = true ] - # Configures the queue used to write to remote storage. queue_config: # Number of samples to buffer per shard before we block reading of more @@ -4066,6 +2886,11 @@ metadata_config: [ send_interval: | default = 1m ] # Maximum number of samples per send. [ max_samples_per_send: | default = 500] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +# enable_http2 defaults to false for remote-write. +[ ] ``` There is a list of @@ -4100,66 +2925,12 @@ headers: # the local storage should have complete data for. [ read_recent: | default = false ] -# Sets the `Authorization` header on every remote read request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the remote read request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - # Whether to use the external labels as selectors for the remote read endpoint. [ filter_external_labels: | default = true ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` There is a list of @@ -4170,8 +2941,6 @@ with this feature. `tsdb` lets you configure the runtime-reloadable configuration settings of the TSDB. -NOTE: Out-of-order ingestion is an experimental feature, but you do not need any additional flag to enable it. Setting `out_of_order_time_window` to a positive duration enables it. - ```yaml # Configures how old an out-of-order/out-of-bounds sample can be w.r.t. the TSDB max time. # An out-of-order/out-of-bounds sample is ingested into the TSDB as long as the timestamp diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 9aa226bbc0..9a8e7a70c9 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -89,6 +89,11 @@ name: # Offset the rule evaluation timestamp of this particular group by the specified duration into the past. [ query_offset: | default = global.rule_query_offset ] +# Labels to add or overwrite before storing the result for its rules. +# Labels defined in will override the key if it has a collision. +labels: + [ : ] + rules: [ - ... ] ``` diff --git a/docs/feature_flags.md b/docs/feature_flags.md index ba140c992a..4be11ed472 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -11,15 +11,6 @@ Their behaviour can change in future releases which will be communicated via the You can enable them using the `--enable-feature` flag with a comma separated list of features. They may be enabled by default in future versions. -## Expand environment variables in external labels - -`--enable-feature=expand-external-labels` - -Replace `${var}` or `$var` in the [`external_labels`](configuration/configuration.md#configuration-file) -values according to the values of the current environment variables. References -to undefined variables are replaced by the empty string. -The `$` character can be escaped by using `$$`. - ## Exemplars storage `--enable-feature=exemplar-storage` @@ -32,9 +23,8 @@ Exemplar storage is implemented as a fixed size circular buffer that stores exem `--enable-feature=memory-snapshot-on-shutdown` -This takes the snapshot of the chunks that are in memory along with the series information when shutting down and stores -it on disk. This will reduce the startup time since the memory state can be restored with this snapshot and m-mapped -chunks without the need of WAL replay. +This takes a snapshot of the chunks that are in memory along with the series information when shutting down and stores it on disk. This will reduce the startup time since the memory state can now be restored with this snapshot +and m-mapped chunks, while a WAL replay from disk is only needed for the parts of the WAL that are not part of the snapshot. ## Extra scrape metrics @@ -47,16 +37,6 @@ When enabled, for each instance scrape, Prometheus stores a sample in the follow to find out how close they are to reaching the limit with `scrape_samples_post_metric_relabeling / scrape_sample_limit`. Note that `scrape_sample_limit` can be zero if there is no limit configured, which means that the query above can return `+Inf` for targets with no limit (as we divide by zero). If you want to query only for targets that do have a sample limit use this query: `scrape_samples_post_metric_relabeling / (scrape_sample_limit > 0)`. - `scrape_body_size_bytes`. The uncompressed size of the most recent scrape response, if successful. Scrapes failing because `body_size_limit` is exceeded report `-1`, other scrape failures report `0`. -## Prometheus agent - -`--enable-feature=agent` - -When enabled, Prometheus runs in agent mode. The agent mode is limited to -discovery, scrape and remote write. - -This is useful when you do not need to query the Prometheus data locally, but -only from a central [remote endpoint](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). - ## Per-step stats `--enable-feature=promql-per-step-stats` @@ -81,15 +61,6 @@ When enabled, the GOMEMLIMIT variable is automatically set to match the Linux co There is also an additional tuning flag, `--auto-gomemlimit.ratio`, which allows controlling how much of the memory is used for Prometheus. The remainder is reserved for memory outside the process. For example, kernel page cache. Page cache is important for Prometheus TSDB query performance. The default is `0.9`, which means 90% of the memory limit will be used for Prometheus. -## No default scrape port - -`--enable-feature=no-default-scrape-port` - -When enabled, the default ports for HTTP (`:80`) or HTTPS (`:443`) will _not_ be added to -the address used to scrape a target (the value of the `__address_` label), contrary to the default behavior. -In addition, if a default HTTP or HTTPS port has already been added either in a static configuration or -by a service discovery mechanism and the respective scheme is specified (`http` or `https`), that port will be removed. - ## Native Histograms `--enable-feature=native-histograms` @@ -112,67 +83,7 @@ those classic histograms that do not come with a corresponding native histogram. However, if a native histogram is present, Prometheus will ignore the corresponding classic histogram, with the notable exception of exemplars, which are always ingested. To keep the classic histograms as well, enable -`scrape_classic_histograms` in the scrape job. - -_Note about the format of `le` and `quantile` label values:_ - -In certain situations, the protobuf parsing changes the number formatting of -the `le` labels of classic histograms and the `quantile` labels of -summaries. Typically, this happens if the scraped target is instrumented with -[client_golang](https://github.com/prometheus/client_golang) provided that -[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts) -is set to `false`. In such a case, integer label values are represented in the -text format as such, e.g. `quantile="1"` or `le="2"`. However, the protobuf parsing -changes the representation to float-like (following the OpenMetrics -specification), so the examples above become `quantile="1.0"` and `le="2.0"` after -ingestion into Prometheus, which changes the identity of the metric compared to -what was ingested before via the text format. - -The effect of this change is that alerts, recording rules and dashboards that -directly reference label values as whole numbers such as `le="1"` will stop -working. - -Aggregation by the `le` and `quantile` labels for vectors that contain the old and -new formatting will lead to unexpected results, and range vectors that span the -transition between the different formatting will contain additional series. -The most common use case for both is the quantile calculation via -`histogram_quantile`, e.g. -`histogram_quantile(0.95, sum by (le) (rate(histogram_bucket[10m])))`. -The `histogram_quantile` function already tries to mitigate the effects to some -extent, but there will be inaccuracies, in particular for shorter ranges that -cover only a few samples. - -Ways to deal with this change either globally or on a per metric basis: - -- Fix references to integer `le`, `quantile` label values, but otherwise do -nothing and accept that some queries that span the transition time will produce -inaccurate or unexpected results. -_This is the recommended solution, to get consistently normalized label values._ -Also Prometheus 3.0 is expected to enforce normalization of these label values. -- Use `metric_relabel_config` to retain the old labels when scraping targets. -This should **only** be applied to metrics that currently produce such labels. - - -```yaml - metric_relabel_configs: - - source_labels: - - quantile - target_label: quantile - regex: (\d+)\.0+ - - source_labels: - - le - - __name__ - target_label: le - regex: (\d+)\.0+;.*_bucket -``` - -## OTLP Receiver - -`--enable-feature=otlp-write-receiver` - -The OTLP receiver allows Prometheus to accept [OpenTelemetry](https://opentelemetry.io/) metrics writes. -Prometheus is best used as a Pull based system, and staleness, `up` metric, and other Pull enabled features -won't work when you push OTLP metrics. +`always_scrape_classic_histograms` in the scrape job. ## Experimental PromQL functions diff --git a/docs/migration.md b/docs/migration.md index cb88bbfd6f..43fc43df2a 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -3,198 +3,198 @@ title: Migration sort_rank: 10 --- -# Prometheus 2.0 migration guide +# Prometheus 3.0 migration guide -In line with our [stability promise](https://prometheus.io/blog/2016/07/18/prometheus-1-0-released/#fine-print), -the Prometheus 2.0 release contains a number of backwards incompatible changes. -This document offers guidance on migrating from Prometheus 1.8 to Prometheus 2.0 and newer versions. +In line with our [stability promise](https://prometheus.io/docs/prometheus/latest/stability/), +the Prometheus 3.0 release contains a number of backwards incompatible changes. +This document offers guidance on migrating from Prometheus 2.x to Prometheus 3.0 and newer versions. ## Flags -The format of Prometheus command line flags has changed. Instead of a -single dash, all flags now use a double dash. Common flags (`--config.file`, -`--web.listen-address` and `--web.external-url`) remain but -almost all storage-related flags have been removed. +- The following feature flags have been removed and they have been added to the + default behavior of Prometheus v3: + - `promql-at-modifier` + - `promql-negative-offset` + - `remote-write-receiver` + - `new-service-discovery-manager` + - `expand-external-labels` + Environment variable references `${var}` or `$var` in external label values + are replaced according to the values of the current environment variables. + References to undefined variables are replaced by the empty string. + The `$` character can be escaped by using `$$`. + - `no-default-scrape-port` + Prometheus v3 will no longer add ports to scrape targets according to the + specified scheme. Target will now appear in labels as configured. + If you rely on scrape targets like + `https://example.com/metrics` or `http://exmaple.com/metrics` to be + represented as `https://example.com/metrics:443` and + `http://example.com/metrics:80` respectively, add them to your target URLs + - `agent` + Instead use the dedicated `--agent` cli flag. -Some notable flags which have been removed: + Prometheus v3 will log a warning if you continue to pass these to + `--enable-feature`. -- `-alertmanager.url` In Prometheus 2.0, the command line flags for configuring - a static Alertmanager URL have been removed. Alertmanager must now be - discovered via service discovery, see [Alertmanager service discovery](#alertmanager-service-discovery). +## Configuration -- `-log.format` In Prometheus 2.0 logs can only be streamed to standard error. - -- `-query.staleness-delta` has been renamed to `--query.lookback-delta`; Prometheus - 2.0 introduces a new mechanism for handling staleness, see [staleness](querying/basics.md#staleness). - -- `-storage.local.*` Prometheus 2.0 introduces a new storage engine; as such all - flags relating to the old engine have been removed. For information on the - new engine, see [Storage](#storage). - -- `-storage.remote.*` Prometheus 2.0 has removed the deprecated remote - storage flags, and will fail to start if they are supplied. To write to - InfluxDB, Graphite, or OpenTSDB use the relevant storage adapter. - -## Alertmanager service discovery - -Alertmanager service discovery was introduced in Prometheus 1.4, allowing Prometheus -to dynamically discover Alertmanager replicas using the same mechanism as scrape -targets. In Prometheus 2.0, the command line flags for static Alertmanager config -have been removed, so the following command line flag: - -``` -./prometheus -alertmanager.url=http://alertmanager:9093/ -``` - -Would be replaced with the following in the `prometheus.yml` config file: - -```yaml -alerting: - alertmanagers: - - static_configs: - - targets: - - alertmanager:9093 -``` - -You can also use all the usual Prometheus service discovery integrations and -relabeling in your Alertmanager configuration. This snippet instructs -Prometheus to search for Kubernetes pods, in the `default` namespace, with the -label `name: alertmanager` and with a non-empty port. - -```yaml -alerting: - alertmanagers: - - kubernetes_sd_configs: - - role: pod - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_name] - regex: alertmanager - action: keep - - source_labels: [__meta_kubernetes_namespace] - regex: default - action: keep - - source_labels: [__meta_kubernetes_pod_container_port_number] - regex: - action: drop -``` - -## Recording rules and alerts - -The format for configuring alerting and recording rules has been changed to YAML. -An example of a recording rule and alert in the old format: - -``` -job:request_duration_seconds:histogram_quantile99 = - histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - -ALERT FrontendRequestLatency - IF job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - FOR 5m - ANNOTATIONS { - summary = "High frontend request latency", - } -``` - -Would look like this: - -```yaml -groups: -- name: example.rules - rules: - - record: job:request_duration_seconds:histogram_quantile99 - expr: histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - - alert: FrontendRequestLatency - expr: job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - for: 5m - annotations: - summary: High frontend request latency -``` - -To help with the change, the `promtool` tool has a mode to automate the rules conversion. Given a `.rules` file, it will output a `.rules.yml` file in the -new format. For example: - -``` -$ promtool update rules example.rules -``` - -You will need to use `promtool` from [Prometheus 2.5](https://github.com/prometheus/prometheus/releases/tag/v2.5.0) as later versions no longer contain the above subcommand. - -## Storage - -The data format in Prometheus 2.0 has completely changed and is not backwards -compatible with 1.8 and older versions. To retain access to your historic monitoring data we -recommend you run a non-scraping Prometheus instance running at least version -1.8.1 in parallel with your Prometheus 2.0 instance, and have the new server -read existing data from the old one via the remote read protocol. - -Your Prometheus 1.8 instance should be started with the following flags and an -config file containing only the `external_labels` setting (if any): - -``` -$ ./prometheus-1.8.1.linux-amd64/prometheus -web.listen-address ":9094" -config.file old.yml -``` - -Prometheus 2.0 can then be started (on the same machine) with the following flags: - -``` -$ ./prometheus-2.0.0.linux-amd64/prometheus --config.file prometheus.yml -``` - -Where `prometheus.yml` contains in addition to your full existing configuration, the stanza: - -```yaml -remote_read: - - url: "http://localhost:9094/api/v1/read" -``` +- The scrape job level configuration option `scrape_classic_histograms` has been + renamed to `always_scrape_classic_histograms`. If you use the + `--enable-feature=native-histograms` feature flag to ingest native histograms + and you also want to ingest classic histograms that an endpoint might expose + along with native histograms, be sure to add this configuration or change your + configuration from the old name. +- The `http_config.enable_http2` in `remote_write` items default has been + changed to `false`. In Prometheus v2 the remote write http client would + default to use http2. In order to parallelize multiple remote write queues + across multiple sockets its preferable to not default to http2. + If you prefer to use http2 for remote write you must now set + `http_config.enable_http2: true` in your `remote_write` configuration section. ## PromQL -The following features have been removed from PromQL: +- The `.` pattern in regular expressions in PromQL matches newline characters. + With this change a regular expressions like `.*` matches strings that include + `\n`. This applies to matchers in queries and relabel configs. For example the + following regular expressions now match the accompanying strings, wheras in + Prometheus v2 these combinations didn't match. -- `drop_common_labels` function - the `without` aggregation modifier should be used - instead. -- `keep_common` aggregation modifier - the `by` modifier should be used instead. -- `count_scalar` function - use cases are better handled by `absent()` or correct - propagation of labels in operations. +| Regex | Additional matches | +| ----- | ------ | +| ".*" | "foo\n", "Foo\nBar" | +| "foo.?bar" | "foo\nbar" | +| "foo.+bar" | "foo\nbar" | -See [issue #3060](https://github.com/prometheus/prometheus/issues/3060) for more -details. + If you want Prometheus v3 to behave like v2 did, you will have to change your + regular expressions by replacing all `.` patterns with `[^\n]`, e.g. + `foo[^\n]*`. +- Lookback and range selectors are left open and right closed (previously left + closed and right closed). This change affects queries when the evaluation time + perfectly aligns with the sample timestamps. For example assume querying a + timeseries with even spaced samples exactly 1 minute apart. Before Prometheus + 3.x, range query with `5m` will mostly return 5 samples. But if the query + evaluation aligns perfectly with a scrape, it would return 6 samples. In + Prometheus 3.x queries like this will always return 5 samples. + This change has likely few effects for everyday use, except for some sub query + use cases. + Query front-ends that align queries usually align sub-queries to multiples of + the step size. These sub queries will likely be affected. + Tests are more likely to affected. To fix those either adjust the expected + number of samples or extend to range by less then one sample interval. +- The `holt_winters` function has been renamed to `double_exponential_smoothing` + and is now guarded by the `promql-experimental-functions` feature flag. + If you want to keep using holt_winters, you have to do both of these things: + - Rename holt_winters to double_exponential_smoothing in your queries. + - Pass `--enable-feature=promql-experimental-functions` in your Prometheus + cli invocation.. + +## Scrape protocols +Prometheus v3 is more strict concerning the Content-Type header received when +scraping. Prometheus v2 would default to the standard Prometheus text protocol +if the target being scraped did not specify a Content-Type header or if the +header was unparsable or unrecognised. This could lead to incorrect data being +parsed in the scrape. Prometheus v3 will now fail the scrape in such cases. + +If a scrape target is not providing the correct Content-Type header the +fallback protocol can be specified using the fallback_scrape_protocol +parameter. See [Prometheus scrape_config documentation.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) + +This is a breaking change as scrapes that may have succeeded with Prometheus v2 +may now fail if this fallback protocol is not specified. ## Miscellaneous -### Prometheus non-root user +### TSDB format and downgrade +The TSDB format has been changed in Prometheus v2.55 in preparation for changes +to the index format. Consequently a Prometheus v3 tsdb can only be read by a +Prometheus v2.55 or newer. +Before upgrading to Prometheus v3 please upgrade to v2.55 first and confirm +Prometheus works as expected. Only then continue with the upgrade to v3. -The Prometheus Docker image is now built to [run Prometheus -as a non-root user](https://github.com/prometheus/prometheus/pull/2859). If you -want the Prometheus UI/API to listen on a low port number (say, port 80), you'll -need to override it. For Kubernetes, you would use the following YAML: +### TSDB Storage contract +TSDB compatible storage is now expected to return results matching the specified +selectors. This might impact some third party implementations, most likely +implementing `remote_read`. +This contract is not explicitly enforced, but can cause undefined behavior. + +### UTF-8 names +Prometheus v3 supports UTF-8 in metric and label names. This means metric and +label names can change after upgrading according to what is exposed by +endpoints. Furthermore, metric and label names that would have previously been +flagged as invalid no longer will be. + +Users wishing to preserve the original validation behavior can update their +prometheus yaml configuration to specify the legacy validation scheme: + +``` +global: + metric_name_validation_scheme: legacy +``` + +Or on a per-scrape basis: + +``` +scrape_configs: + - job_name: job1 + metric_name_validation_scheme: utf8 + - job_name: job2 + metric_name_validation_scheme: legacy +``` + +### Log message format +Prometheus v3 has adopted `log/slog` over the previous `go-kit/log`. This +results in a change of log message format. An example of the old log format is: +``` +ts=2024-10-23T22:01:06.074Z caller=main.go:627 level=info msg="No time or size retention was set so using the default time retention" duration=15d +ts=2024-10-23T22:01:06.074Z caller=main.go:671 level=info msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=91d80252c3e528728b0f88d254dd720f6be07cb8-modified)" +ts=2024-10-23T22:01:06.074Z caller=main.go:676 level=info build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" +ts=2024-10-23T22:01:06.074Z caller=main.go:677 level=info host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" +``` + +a similar sequence in the new log format looks like this: +``` +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:640 msg="No time or size retention was set so using the default time retention" duration=15d +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:681 msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=7c7116fea8343795cae6da42960cacd0207a2af8)" +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:686 msg="operational information" build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" fd_limits="(soft=1048576, hard=1048576)" vm_limits="(soft=unlimited, hard=unlimited)" +``` + +### `le` and `quantile` label values +In Prometheus v3, the values of the `le` label of classic histograms and the +`quantile` label of summaries are normalized upon ingestions. In Prometheus v2 +the value of these labels depended on the scrape protocol (protobuf vs text +format) in some situations. This led to label values changing based on the +scrape protocol. E.g. a metric exposed as `my_classic_hist{le="1"}` would be +ingested as `my_classic_hist{le="1"}` via the text format, but as +`my_classic_hist{le="1.0"}` via protobuf. This changed the identity of the +metric and caused problems when querying the metric. +In Prometheus v3 these label values will always be normalized to a float like +representation. I.e. the above example will always result in +`my_classic_hist{le="1.0"}` being ingested into prometheus, no matter via which +protocol. The effect of this change is that alerts, recording rules and +dashboards that directly reference label values as whole numbers such as +`le="1"` will stop working. + +Ways to deal with this change either globally or on a per metric basis: + +- Fix references to integer `le`, `quantile` label values, but otherwise do +nothing and accept that some queries that span the transition time will produce +inaccurate or unexpected results. +_This is the recommended solution._ +- Use `metric_relabel_config` to retain the old labels when scraping targets. +This should **only** be applied to metrics that currently produce such labels. ```yaml -apiVersion: v1 -kind: Pod -metadata: - name: security-context-demo-2 -spec: - securityContext: - runAsUser: 0 -... + metric_relabel_configs: + - source_labels: + - quantile + target_label: quantile + regex: (\d+)\.0+ + - source_labels: + - le + - __name__ + target_label: le + regex: (\d+)\.0+;.*_bucket ``` -See [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) -for more details. +# Prometheus 2.0 migration guide -If you're using Docker, then the following snippet would be used: - -``` -docker run -p 9090:9090 prom/prometheus:latest -``` - -### Prometheus lifecycle - -If you use the Prometheus `/-/reload` HTTP endpoint to [automatically reload your -Prometheus config when it changes](configuration/configuration.md), -these endpoints are disabled by default for security reasons in Prometheus 2.0. -To enable them, set the `--web.enable-lifecycle` flag. +For the Prometheus 1.8 to 2.0 please refer to the [Prometheus v2.55 documentation](https://prometheus.io/docs/prometheus/2.55/migration/). diff --git a/docs/querying/api.md b/docs/querying/api.md index e32c8ecaf5..0352496f18 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -59,7 +59,7 @@ timestamps are always represented as Unix timestamps in seconds. * ``: Prometheus [time series selectors](basics.md#time-series-selectors) like `http_requests_total` or `http_requests_total{method=~"(GET|POST)"}` and need to be URL-encoded. -* ``: [Prometheus duration strings](basics.md#time-durations). +* ``: [the subset of Prometheus float literals using time units](basics.md#float-literals-and-time-durations). For example, `5m` refers to a duration of 5 minutes. * ``: boolean values (strings `true` and `false`). @@ -568,7 +568,7 @@ Instant vectors are returned as result type `vector`. The corresponding Each series could have the `"value"` key, or the `"histogram"` key, but not both. Series are not guaranteed to be returned in any particular order unless a function -such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label)` +such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label) is used. ### Scalars @@ -764,6 +764,8 @@ URL query parameters: - `file[]=`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done. - `exclude_alerts=`: only return rules, do not return active alerts. - `match[]=`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional. +- `group_limit=`: The `group_limit` parameter allows you to specify a limit for the number of rule groups that is returned in a single response. If the total number of rule groups exceeds the specified `group_limit` value, the response will include a `groupNextToken` property. You can use the value of this `groupNextToken` property in subsequent requests in the `group_next_token` parameter to paginate over the remaining rule groups. The `groupNextToken` property will not be present in the final response, indicating that you have retrieved all the available rule groups. Please note that there are no guarantees regarding the consistency of the response if the rule groups are being modified during the pagination process. +- `group_next_token`: the pagination token that was returned in previous request when the `group_limit` property is set. The pagination token is used to iteratively paginate over a large number of rule groups. To use the `group_next_token` parameter, the `group_limit` parameter also need to be present. If a rule group that coincides with the next token is removed while you are paginating over the rule groups, a response with status code 400 will be returned. ```json $ curl http://localhost:9090/api/v1/rules @@ -903,7 +905,7 @@ curl -G http://localhost:9091/api/v1/targets/metadata \ ``` The following example returns metadata for all metrics for all targets with -label `instance="127.0.0.1:9090`. +label `instance="127.0.0.1:9090"`. ```json curl -G http://localhost:9091/api/v1/targets/metadata \ @@ -1188,9 +1190,11 @@ The following endpoint returns various cardinality statistics about the Promethe GET /api/v1/status/tsdb ``` URL query parameters: + - `limit=`: Limit the number of returned items to a given number for each set of statistics. By default, 10 items are returned. -The `data` section of the query result consists of +The `data` section of the query result consists of: + - **headStats**: This provides the following data about the head block of the TSDB: - **numSeries**: The number of series. - **chunkCount**: The number of chunks. @@ -1266,13 +1270,13 @@ The following endpoint returns information about the WAL replay: GET /api/v1/status/walreplay ``` -**read**: The number of segments replayed so far. -**total**: The total number segments needed to be replayed. -**progress**: The progress of the replay (0 - 100%). -**state**: The state of the replay. Possible states: -- **waiting**: Waiting for the replay to start. -- **in progress**: The replay is in progress. -- **done**: The replay has finished. +- **read**: The number of segments replayed so far. +- **total**: The total number segments needed to be replayed. +- **progress**: The progress of the replay (0 - 100%). +- **state**: The state of the replay. Possible states: + - **waiting**: Waiting for the replay to start. + - **in progress**: The replay is in progress. + - **done**: The replay has finished. ```json $ curl http://localhost:9090/api/v1/status/walreplay @@ -1388,8 +1392,74 @@ is not considered an efficient way of ingesting samples. Use it with caution for specific low-volume use cases. It is not suitable for replacing the ingestion via scraping. -Enable the OTLP receiver by the feature flag -`--enable-feature=otlp-write-receiver`. When enabled, the OTLP receiver +Enable the OTLP receiver by setting +`--web.enable-otlp-receiver`. When enabled, the OTLP receiver endpoint is `/api/v1/otlp/v1/metrics`. *New in v2.47* + +## Notifications + +The following endpoints provide information about active status notifications concerning the Prometheus server itself. +Notifications are used in the web UI. + +These endpoints are **experimental**. They may change in the future. + +### Active Notifications + +The `/api/v1/notifications` endpoint returns a list of all currently active notifications. + +``` +GET /api/v1/notifications +``` + +Example: + +``` +$ curl http://localhost:9090/api/v1/notifications +{ + "status": "success", + "data": [ + { + "text": "Prometheus is shutting down and gracefully stopping all operations.", + "date": "2024-10-07T12:33:08.551376578+02:00", + "active": true + } + ] +} +``` + +*New in v3.0* + +### Live Notifications + +The `/api/v1/notifications/live` endpoint streams live notifications as they occur, using [Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events). Deleted notifications are sent with `active: false`. Active notifications will be sent when connecting to the endpoint. + +``` +GET /api/v1/notifications/live +``` + +Example: + +``` +$ curl http://localhost:9090/api/v1/notifications/live +data: { + "status": "success", + "data": [ + { + "text": "Prometheus is shutting down and gracefully stopping all operations.", + "date": "2024-10-07T12:33:08.551376578+02:00", + "active": true + } + ] +} +``` + +**Note:** The `/notifications/live` endpoint will return a `204 No Content` response if the maximum number of subscribers has been reached. You can set the maximum number of listeners with the flag `--web.max-notifications-subscribers`, which defaults to 16. + +``` +GET /api/v1/notifications/live +204 No Content +``` + +*New in v3.0* diff --git a/docs/querying/basics.md b/docs/querying/basics.md index 4ea186beeb..1c06afb85d 100644 --- a/docs/querying/basics.md +++ b/docs/querying/basics.md @@ -35,8 +35,9 @@ evaluate to one of four types: Depending on the use-case (e.g. when graphing vs. displaying the output of an expression), only some of these types are legal as the result of a -user-specified expression. For example, an expression that returns an instant -vector is the only type which can be graphed. +user-specified expression. +For [instant queries](api.md#instant-queries), any of the above data types are allowed as the root of the expression. +[Range queries](api.md/#range-queries) only support scalar-typed and instant-vector-typed expressions. _Notes about the experimental native histograms:_ @@ -68,9 +69,10 @@ Example: 'these are unescaped: \n \\ \t' `these are not unescaped: \n ' " \t` -### Float literals +### Float literals and time durations -Scalar float values can be written as literal integer or floating-point numbers in the format (whitespace only included for better readability): +Scalar float values can be written as literal integer or floating-point numbers +in the format (whitespace only included for better readability): [-+]?( [0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? @@ -87,16 +89,53 @@ Examples: 0x8f -Inf NaN - -As of version 2.54, float literals can also be represented using the syntax of time durations, where the time duration is converted into a float value corresponding to the number of seconds the time duration represents. This is an experimental feature and might still change. +Additionally, underscores (`_`) can be used in between decimal or hexadecimal +digits to improve readability. Examples: - 1s # Equivalent to 1.0 - 2m # Equivalent to 120.0 - 1ms # Equivalent to 0.001 - + 1_000_000 + .123_456_789 + 0x_53_AB_F3_82 + +Float literals are also used to specify durations in seconds. For convenience, +decimal integer numbers may be combined with the following +time units: + +* `ms` – milliseconds +* `s` – seconds – 1s equals 1000ms +* `m` – minutes – 1m equals 60s (ignoring leap seconds) +* `h` – hours – 1h equals 60m +* `d` – days – 1d equals 24h (ignoring so-called daylight saving time) +* `w` – weeks – 1w equals 7d +* `y` – years – 1y equals 365d (ignoring leap days) + +Suffixing a decimal integer number with one of the units above is a different +representation of the equivalent number of seconds as a bare float literal. + +Examples: + + 1s # Equivalent to 1. + 2m # Equivalent to 120. + 1ms # Equivalent to 0.001. + -2h # Equivalent to -7200. + +The following examples do _not_ work: + + 0xABm # No suffixing of hexadecimal numbers. + 1.5h # Time units cannot be combined with a floating point. + +Infd # No suffixing of ±Inf or NaN. + +Multiple units can be combined by concatenation of suffixed integers. Units +must be ordered from the longest to the shortest. A given unit must only appear +once per float literal. + +Examples: + + 1h30m # Equivalent to 5400s and thus 5400. + 12h34m56s # Equivalent to 45296s and thus 45296. + 54s321ms # Equivalent to 54.321. ## Time series selectors @@ -109,8 +148,16 @@ single sample value for each at a given timestamp (point in time). In the simpl form, only a metric name is specified, which results in an instant vector containing elements for all time series that have this metric name. +The value returned will be that of the most recent sample at or before the +query's evaluation timestamp (in the case of an +[instant query](api.md#instant-queries)) +or the current step within the query (in the case of a +[range query](api.md/#range-queries)). +The [`@` modifier](#modifier) allows overriding the timestamp relative to which +the selection takes place. Time series are only returned if their most recent sample is less than the [lookback period](#staleness) ago. + This example selects all time series that have the `http_requests_total` metric -name: +name, returning the most recent sample for each: http_requests_total @@ -200,53 +247,22 @@ syntax](https://github.com/google/re2/wiki/Syntax). ### Range Vector Selectors Range vector literals work like instant vector literals, except that they -select a range of samples back from the current instant. Syntactically, a [time -duration](#time-durations) is appended in square brackets (`[]`) at the end of -a vector selector to specify how far back in time values should be fetched for -each resulting range vector element. The range is a left-open and right-closed interval, -i.e. samples with timestamps coinciding with the left boundary of the range are excluded from the selection, -while samples coinciding with the right boundary of the range are included in the selection. +select a range of samples back from the current instant. Syntactically, a +[float literal](#float-literals-and-time-durations) is appended in square +brackets (`[]`) at the end of a vector selector to specify for how many seconds +back in time values should be fetched for each resulting range vector element. +Commonly, the float literal uses the syntax with one or more time units, e.g. +`[5m]`. The range is a left-open and right-closed interval, i.e. samples with +timestamps coinciding with the left boundary of the range are excluded from the +selection, while samples coinciding with the right boundary of the range are +included in the selection. -In this example, we select all the values recorded less than 5m ago for all time series -that have the metric name `http_requests_total` and -a `job` label set to `prometheus`: +In this example, we select all the values recorded less than 5m ago for all +time series that have the metric name `http_requests_total` and a `job` label +set to `prometheus`: http_requests_total{job="prometheus"}[5m] -### Time Durations - -Time durations are specified as a number, followed immediately by one of the -following units: - -* `ms` - milliseconds -* `s` - seconds -* `m` - minutes -* `h` - hours -* `d` - days - assuming a day always has 24h -* `w` - weeks - assuming a week always has 7d -* `y` - years - assuming a year always has 365d1 - -1 For days in a year, the leap day is ignored, and conversely, for a minute, a leap second is ignored. - -Time durations can be combined by concatenation. Units must be ordered from the -longest to the shortest. A given unit must only appear once in a time duration. - -Here are some examples of valid time durations: - - 5h - 1h30m - 5m - 10s - - -As of version 2.54, time durations can also be represented using the syntax of float literals, implying the number of seconds of the time duration. This is an experimental feature and might still change. - -Examples: - - 1.0 # Equivalent to 1s - 0.001 # Equivalent to 1ms - 120 # Equivalent to 2m - ### Offset modifier The `offset` modifier allows changing the time offset for individual @@ -329,7 +345,7 @@ Note that the `@` modifier allows a query to look ahead of its evaluation time. Subquery allows you to run an instant query for a given range and resolution. The result of a subquery is a range vector. -Syntax: ` '[' ':' [] ']' [ @ ] [ offset ]` +Syntax: ` '[' ':' [] ']' [ @ ] [ offset ]` * `` is optional. Default is the global evaluation interval. @@ -359,7 +375,8 @@ cases like aggregation (`sum`, `avg`, and so on), where multiple aggregated time series do not precisely align in time. Because of their independence, Prometheus needs to assign a value at those timestamps for each relevant time series. It does so by taking the newest sample that is less than the lookback period ago. -The lookback period is 5 minutes by default. +The lookback period is 5 minutes by default, but can be +[set with the `--query.lookback-delta` flag](../command-line/prometheus.md) If a target scrape or rule evaluation no longer returns a sample for a time series that was previously present, this time series will be marked as stale. diff --git a/docs/querying/functions.md b/docs/querying/functions.md index e13628c5c5..310b7b9337 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -326,45 +326,70 @@ With native histograms, aggregating everything works as usual without any `by` c histogram_quantile(0.9, sum(rate(http_request_duration_seconds[10m]))) -The `histogram_quantile()` function interpolates quantile values by -assuming a linear distribution within a bucket. +In the (common) case that a quantile value does not coincide with a bucket +boundary, the `histogram_quantile()` function interpolates the quantile value +within the bucket the quantile value falls into. For classic histograms, for +native histograms with custom bucket boundaries, and for the zero bucket of +other native histograms, it assumes a uniform distribution of observations +within the bucket (also called _linear interpolation_). For the +non-zero-buckets of native histograms with a standard exponential bucketing +schema, the interpolation is done under the assumption that the samples within +the bucket are distributed in a way that they would uniformly populate the +buckets in a hypothetical histogram with higher resolution. (This is also +called _exponential interpolation_.) If `b` has 0 observations, `NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned. -The following is only relevant for classic histograms: If `b` contains -fewer than two buckets, `NaN` is returned. The highest bucket must have an -upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If a quantile is located -in the highest bucket, the upper bound of the second highest bucket is -returned. A lower limit of the lowest bucket is assumed to be 0 if the upper -bound of that bucket is greater than -0. In that case, the usual linear interpolation is applied within that -bucket. Otherwise, the upper bound of the lowest bucket is returned for -quantiles located in the lowest bucket. +Special cases for classic histograms: -You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in -a histogram. +* If `b` contains fewer than two buckets, `NaN` is returned. +* The highest bucket must have an upper bound of `+Inf`. (Otherwise, `NaN` is + returned.) +* If a quantile is located in the highest bucket, the upper bound of the second + highest bucket is returned. +* The lower limit of the lowest bucket is assumed to be 0 if the upper bound of + that bucket is greater than 0. In that case, the usual linear interpolation + is applied within that bucket. Otherwise, the upper bound of the lowest + bucket is returned for quantiles located in the lowest bucket. -You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in -a histogram. +Special cases for native histograms (relevant for the exact interpolation +happening within the zero bucket): -Buckets of classic histograms are cumulative. Therefore, the following should always be the case: +* A zero bucket with finite width is assumed to contain no negative + observations if the histogram has observations in positive buckets, but none + in negative buckets. +* A zero bucket with finite width is assumed to contain no positive + observations if the histogram has observations in negative buckets, but none + in positive buckets. -* The counts in the buckets are monotonically increasing (strictly non-decreasing). -* A lack of observations between the upper limits of two consecutive buckets results in equal counts -in those two buckets. +You can use `histogram_quantile(0, v instant-vector)` to get the estimated +minimum value stored in a histogram. -However, floating point precision issues (e.g. small discrepancies introduced by computing of buckets -with `sum(rate(...))`) or invalid data might violate these assumptions. In that case, -`histogram_quantile` would be unable to return meaningful results. To mitigate the issue, -`histogram_quantile` assumes that tiny relative differences between consecutive buckets are happening -because of floating point precision errors and ignores them. (The threshold to ignore a difference -between two buckets is a trillionth (1e-12) of the sum of both buckets.) Furthermore, if there are -non-monotonic bucket counts even after this adjustment, they are increased to the value of the -previous buckets to enforce monotonicity. The latter is evidence for an actual issue with the input -data and is therefore flagged with an informational annotation reading `input to histogram_quantile -needed to be fixed for monotonicity`. If you encounter this annotation, you should find and remove -the source of the invalid data. +You can use `histogram_quantile(1, v instant-vector)` to get the estimated +maximum value stored in a histogram. + +Buckets of classic histograms are cumulative. Therefore, the following should +always be the case: + +* The counts in the buckets are monotonically increasing (strictly + non-decreasing). +* A lack of observations between the upper limits of two consecutive buckets + results in equal counts in those two buckets. + +However, floating point precision issues (e.g. small discrepancies introduced +by computing of buckets with `sum(rate(...))`) or invalid data might violate +these assumptions. In that case, `histogram_quantile` would be unable to return +meaningful results. To mitigate the issue, `histogram_quantile` assumes that +tiny relative differences between consecutive buckets are happening because of +floating point precision errors and ignores them. (The threshold to ignore a +difference between two buckets is a trillionth (1e-12) of the sum of both +buckets.) Furthermore, if there are non-monotonic bucket counts even after this +adjustment, they are increased to the value of the previous buckets to enforce +monotonicity. The latter is evidence for an actual issue with the input data +and is therefore flagged with an informational annotation reading `input to +histogram_quantile needed to be fixed for monotonicity`. If you encounter this +annotation, you should find and remove the source of the invalid data. ## `histogram_stddev()` and `histogram_stdvar()` @@ -380,15 +405,22 @@ do not show up in the returned vector. Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard variance of observations in a native histogram. -## `holt_winters()` +## `double_exponential_smoothing()` -`holt_winters(v range-vector, sf scalar, tf scalar)` produces a smoothed value +**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.** + +`double_exponential_smoothing(v range-vector, sf scalar, tf scalar)` produces a smoothed value for time series based on the range in `v`. The lower the smoothing factor `sf`, the more importance is given to old data. The higher the trend factor `tf`, the more trends in the data is considered. Both `sf` and `tf` must be between 0 and 1. +For additional details, refer to [NIST Engineering Statistics Handbook](https://www.itl.nist.gov/div898/handbook/pmc/section4/pmc433.htm). +In Prometheus V2 this function was called `holt_winters`. This caused confusion +since the Holt-Winters method usually refers to triple exponential smoothing. +Double exponential smoothing as implemented here is also referred to as "Holt +Linear". -`holt_winters` should only be used with gauges. +`double_exponential_smoothing` should only be used with gauges. ## `hour()` @@ -432,6 +464,97 @@ by the number of seconds under the specified time range window, and should be used primarily for human readability. Use `rate` in recording rules so that increases are tracked consistently on a per-second basis. +## `info()` (experimental) + +_The `info` function is an experiment to improve UX +around including labels from [info metrics](https://grafana.com/blog/2021/08/04/how-to-use-promql-joins-for-more-effective-queries-of-prometheus-metrics-at-scale/#info-metrics). +The behavior of this function may change in future versions of Prometheus, +including its removal from PromQL. `info` has to be enabled via the +[feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`._ + +`info(v instant-vector, [data-label-selector instant-vector])` finds, for each time +series in `v`, all info series with matching _identifying_ labels (more on +this later), and adds the union of their _data_ (i.e., non-identifying) labels +to the time series. The second argument `data-label-selector` is optional. +It is not a real instant vector, but uses a subset of its syntax. +It must start and end with curly braces (`{ ... }`) and may only contain label matchers. +The label matchers are used to constrain which info series to consider +and which data labels to add to `v`. + +Identifying labels of an info series are the subset of labels that uniquely +identify the info series. The remaining labels are considered +_data labels_ (also called non-identifying). (Note that Prometheus's concept +of time series identity always includes _all_ the labels. For the sake of the `info` +function, we “logically” define info series identity in a different way than +in the conventional Prometheus view.) The identifying labels of an info series +are used to join it to regular (non-info) series, i.e. those series that have +the same labels as the identifying labels of the info series. The data labels, which are +the ones added to the regular series by the `info` function, effectively encode +metadata key value pairs. (This implies that a change in the data labels +in the conventional Prometheus view constitutes the end of one info series and +the beginning of a new info series, while the “logical” view of the `info` function is +that the same info series continues to exist, just with different “data”.) + +The conventional approach of adding data labels is sometimes called a “join query”, +as illustrated by the following example: + +``` + rate(http_server_request_duration_seconds_count[2m]) +* on (job, instance) group_left (k8s_cluster_name) + target_info +``` + +The core of the query is the expression `rate(http_server_request_duration_seconds_count[2m])`. +But to add data labels from an info metric, the user has to use elaborate +(and not very obvious) syntax to specify which info metric to use (`target_info`), what the +identifying labels are (`on (job, instance)`), and which data labels to add +(`group_left (k8s_cluster_name)`). + +This query is not only verbose and hard to write, it might also run into an “identity crisis”: +If any of the data labels of `target_info` changes, Prometheus sees that as a change of series +(as alluded to above, Prometheus just has no native concept of non-identifying labels). +If the old `target_info` series is not properly marked as stale (which can happen with certain ingestion paths), +the query above will fail for up to 5m (the lookback delta) because it will find a conflicting +match with both the old and the new version of `target_info`. + +The `info` function not only resolves this conflict in favor of the newer series, it also simplifies the syntax +because it knows about the available info series and what their identifying labels are. The example query +looks like this with the `info` function: + +``` +info( + rate(http_server_request_duration_seconds_count[2m]), + {k8s_cluster_name=~".+"} +) +``` + +The common case of adding _all_ data labels can be achieved by +omitting the 2nd argument of the `info` function entirely, simplifying +the example even more: + +``` +info(rate(http_server_request_duration_seconds_count[2m])) +``` + +While `info` normally automatically finds all matching info series, it's possible to +restrict them by providing a `__name__` label matcher, e.g. +`{__name__="target_info"}`. + +### Limitations + +In its current iteration, `info` defaults to considering only info series with +the name `target_info`. It also assumes that the identifying info series labels are +`instance` and `job`. `info` does support other info series names however, through +`__name__` label matchers. E.g., one can explicitly say to consider both +`target_info` and `build_info` as follows: +`{__name__=~"(target|build)_info"}`. However, the identifying labels always +have to be `instance` and `job`. + +These limitations are partially defeating the purpose of the `info` function. +At the current stage, this is an experiment to find out how useful the approach +turns out to be in practice. A final version of the `info` function will indeed +consider all matching info series and with their appropriate identifying labels. + ## `irate()` `irate(v range-vector)` calculates the per-second instant rate of increase of diff --git a/docs/querying/remote_read_api.md b/docs/querying/remote_read_api.md index efbd08e984..76de112342 100644 --- a/docs/querying/remote_read_api.md +++ b/docs/querying/remote_read_api.md @@ -17,7 +17,8 @@ Request are made to the following endpoint. ### Samples -This returns a message that includes a list of raw samples. +This returns a message that includes a list of raw samples matching the +requested query. ### Streamed Chunks diff --git a/docs/stability.md b/docs/stability.md index 1fd2e51e0c..cb30b8ad99 100644 --- a/docs/stability.md +++ b/docs/stability.md @@ -9,7 +9,7 @@ Prometheus promises API stability within a major version, and strives to avoid breaking changes for key features. Some features, which are cosmetic, still under development, or depend on 3rd party services, are not covered by this. -Things considered stable for 2.x: +Things considered stable for 3.x: * The query language and data model * Alerting and recording rules @@ -18,21 +18,25 @@ Things considered stable for 2.x: * Configuration file format (minus the service discovery remote read/write, see below) * Rule/alert file format * Console template syntax and semantics -* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/). +* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/) and receiving +* Agent mode +* OTLP receiver endpoint -Things considered unstable for 2.x: +Things considered unstable for 3.x: * Any feature listed as experimental or subject to change, including: - * The [`holt_winters` PromQL function](https://github.com/prometheus/prometheus/issues/2458) - * Remote write receiving, remote read and the remote read endpoint + * The [`double_exponential_smoothing` PromQL function](https://github.com/prometheus/prometheus/issues/2458) + * Remote read and the remote read endpoint * Server-side HTTPS and basic authentication -* Service discovery integrations, with the exception of `static_configs` and `file_sd_configs` +* Service discovery integrations, with the exception of `static_configs`, `file_sd_configs` and `http_sd_config` * Go APIs of packages that are part of the server * HTML generated by the web UI * The metrics in the /metrics endpoint of Prometheus itself * Exact on-disk format. Potential changes however, will be forward compatible and transparently handled by Prometheus * The format of the logs +Prometheus 2.x stability guarantees can be found [in the 2.x documentation](https://prometheus.io/docs/prometheus/2.55/stability/). + As long as you are not using any features marked as experimental/unstable, an upgrade within a major version can usually be performed without any operational adjustments and very little risk that anything will break. Any breaking changes diff --git a/docs/storage.md b/docs/storage.md index d5193bf5b1..2142c970ff 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -144,7 +144,7 @@ a buffer, ensuring that older entries will be removed before the allocated stora for Prometheus becomes full. At present, we recommend setting the retention size to, at most, 80-85% of your -allocated Prometheus disk space. This increases the likelihood that older entires +allocated Prometheus disk space. This increases the likelihood that older entries will be removed prior to hitting any disk limitations. ## Remote storage integrations diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index 8ccbafe6f1..128132a8d2 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "os" @@ -26,10 +27,9 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" prom_discovery "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -41,7 +41,7 @@ var ( a = kingpin.New("sd adapter usage", "Tool to generate file_sd target files for unimplemented SD mechanisms.") outputFile = a.Flag("output.file", "Output file for file_sd compatible file.").Default("custom_sd.json").String() listenAddress = a.Flag("listen.address", "The address the Consul HTTP API is listening on for requests.").Default("localhost:8500").String() - logger log.Logger + logger *slog.Logger // addressLabel is the name for the label containing a target's address. addressLabel = model.MetaLabelPrefix + "consul_address" @@ -90,7 +90,7 @@ type discovery struct { address string refreshInterval int tagSeparator string - logger log.Logger + logger *slog.Logger oldSourceList map[string]bool } @@ -164,7 +164,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { var srvs map[string][]string resp, err := http.Get(fmt.Sprintf("http://%s/v1/catalog/services", d.address)) if err != nil { - level.Error(d.logger).Log("msg", "Error getting services list", "err", err) + d.logger.Error("Error getting services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -173,7 +173,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { io.Copy(io.Discard, resp.Body) resp.Body.Close() if err != nil { - level.Error(d.logger).Log("msg", "Error reading services list", "err", err) + d.logger.Error("Error reading services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -181,7 +181,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { err = json.Unmarshal(b, &srvs) resp.Body.Close() if err != nil { - level.Error(d.logger).Log("msg", "Error parsing services list", "err", err) + d.logger.Error("Error parsing services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -200,13 +200,13 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } resp, err := http.Get(fmt.Sprintf("http://%s/v1/catalog/service/%s", d.address, name)) if err != nil { - level.Error(d.logger).Log("msg", "Error getting services nodes", "service", name, "err", err) + d.logger.Error("Error getting services nodes", "service", name, "err", err) break } tg, err := d.parseServiceNodes(resp, name) if err != nil { - level.Error(d.logger).Log("msg", "Error parsing services nodes", "service", name, "err", err) + d.logger.Error("Error parsing services nodes", "service", name, "err", err) break } tgs = append(tgs, tg) @@ -254,8 +254,7 @@ func main() { fmt.Println("err: ", err) return } - logger = log.NewSyncLogger(log.NewLogfmtLogger(os.Stdout)) - logger = log.With(logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller) + logger = promslog.New(&promslog.Config{}) ctx := context.Background() @@ -272,7 +271,7 @@ func main() { } if err != nil { - level.Error(logger).Log("msg", "failed to create discovery metrics", "err", err) + logger.Error("failed to create discovery metrics", "err", err) os.Exit(1) } @@ -280,7 +279,7 @@ func main() { refreshMetrics := prom_discovery.NewRefreshMetrics(reg) metrics, err := prom_discovery.RegisterSDMetrics(reg, refreshMetrics) if err != nil { - level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err) + logger.Error("failed to register service discovery metrics", "err", err) os.Exit(1) } diff --git a/documentation/examples/custom-sd/adapter/adapter.go b/documentation/examples/custom-sd/adapter/adapter.go index dcf5a2b78c..b242c4eaa0 100644 --- a/documentation/examples/custom-sd/adapter/adapter.go +++ b/documentation/examples/custom-sd/adapter/adapter.go @@ -18,13 +18,12 @@ import ( "context" "encoding/json" "fmt" + "log/slog" "os" "path/filepath" "reflect" "sort" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -55,7 +54,7 @@ type Adapter struct { manager *discovery.Manager output string name string - logger log.Logger + logger *slog.Logger } func mapToArray(m map[string]*customSD) []customSD { @@ -106,7 +105,7 @@ func (a *Adapter) refreshTargetGroups(allTargetGroups map[string][]*targetgroup. a.groups = tempGroups err := a.writeOutput() if err != nil { - level.Error(log.With(a.logger, "component", "sd-adapter")).Log("err", err) + a.logger.With("component", "sd-adapter").Error("failed to write output", "err", err) } } } @@ -163,7 +162,7 @@ func (a *Adapter) Run() { } // NewAdapter creates a new instance of Adapter. -func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger log.Logger, sdMetrics map[string]discovery.DiscovererMetrics, registerer prometheus.Registerer) *Adapter { +func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger *slog.Logger, sdMetrics map[string]discovery.DiscovererMetrics, registerer prometheus.Registerer) *Adapter { return &Adapter{ ctx: ctx, disc: d, diff --git a/documentation/examples/prometheus-ovhcloud.yml b/documentation/examples/prometheus-ovhcloud.yml index 21facad1ca..b2cc60af25 100644 --- a/documentation/examples/prometheus-ovhcloud.yml +++ b/documentation/examples/prometheus-ovhcloud.yml @@ -1,4 +1,4 @@ -# An example scrape configuration for running Prometheus with Ovhcloud. +# An example scrape configuration for running Prometheus with OVHcloud. scrape_configs: - job_name: 'ovhcloud' ovhcloud_sd_configs: diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index 8ed5084d91..0aad437588 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -4,12 +4,11 @@ go 1.22.0 require ( github.com/alecthomas/kingpin/v2 v2.4.0 - github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/influxdata/influxdb v1.11.6 - github.com/prometheus/client_golang v1.20.2 - github.com/prometheus/common v0.57.0 + github.com/prometheus/client_golang v1.20.4 + github.com/prometheus/common v0.60.0 github.com/prometheus/prometheus v0.53.1 github.com/stretchr/testify v1.9.0 ) @@ -26,6 +25,7 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dennwc/varint v1.0.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -55,11 +55,11 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.25.0 // indirect - golang.org/x/net v0.27.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sys v0.22.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/crypto v0.27.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/text v0.18.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect google.golang.org/grpc v1.65.0 // indirect diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index 1abeff7eb1..936b448d84 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -253,8 +253,8 @@ github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXP github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg= -github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= +github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -264,8 +264,8 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVhoNcY= -github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= @@ -323,8 +323,8 @@ golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -344,20 +344,20 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -373,17 +373,17 @@ golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go index 36242a8f4d..b02560dbab 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go @@ -16,19 +16,19 @@ package graphite import ( "bytes" "fmt" + "log/slog" "math" "net" "sort" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" ) // Client allows sending batches of Prometheus samples to Graphite. type Client struct { - logger log.Logger + logger *slog.Logger address string transport string @@ -37,9 +37,9 @@ type Client struct { } // NewClient creates a new Client. -func NewClient(logger log.Logger, address, transport string, timeout time.Duration, prefix string) *Client { +func NewClient(logger *slog.Logger, address, transport string, timeout time.Duration, prefix string) *Client { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Client{ logger: logger, @@ -93,7 +93,7 @@ func (c *Client) Write(samples model.Samples) error { t := float64(s.Timestamp.UnixNano()) / 1e9 v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send value to Graphite, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send value to Graphite, skipping sample", "value", v, "sample", s) continue } fmt.Fprintf(&buf, "%s %f %f\n", k, v, t) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go index e84ed9e129..6ae40f8173 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go @@ -17,22 +17,22 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "math" "os" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" influx "github.com/influxdata/influxdb/client/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/prompb" ) // Client allows sending batches of Prometheus samples to InfluxDB. type Client struct { - logger log.Logger + logger *slog.Logger client influx.Client database string @@ -41,16 +41,16 @@ type Client struct { } // NewClient creates a new Client. -func NewClient(logger log.Logger, conf influx.HTTPConfig, db, rp string) *Client { +func NewClient(logger *slog.Logger, conf influx.HTTPConfig, db, rp string) *Client { c, err := influx.NewHTTPClient(conf) // Currently influx.NewClient() *should* never return an error. if err != nil { - level.Error(logger).Log("err", err) + logger.Error("Error creating influx HTTP client", "err", err) os.Exit(1) } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Client{ @@ -84,7 +84,7 @@ func (c *Client) Write(samples model.Samples) error { for _, s := range samples { v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send to InfluxDB, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send to InfluxDB, skipping sample", "value", v, "sample", s) c.ignoredSamples.Inc() continue } diff --git a/documentation/examples/remote_storage/remote_storage_adapter/main.go b/documentation/examples/remote_storage/remote_storage_adapter/main.go index bb348aba7f..7f62990d2e 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/main.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/main.go @@ -17,6 +17,7 @@ package main import ( "fmt" "io" + "log/slog" "net/http" _ "net/http/pprof" "net/url" @@ -26,16 +27,14 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" influx "github.com/influxdata/influxdb/client/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" - "github.com/prometheus/common/promlog/flag" + "github.com/prometheus/common/promslog" + "github.com/prometheus/common/promslog/flag" "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/graphite" "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/influxdb" @@ -57,7 +56,7 @@ type config struct { remoteTimeout time.Duration listenAddr string telemetryPath string - promlogConfig promlog.Config + promslogConfig promslog.Config } var ( @@ -105,11 +104,11 @@ func main() { cfg := parseFlags() http.Handle(cfg.telemetryPath, promhttp.Handler()) - logger := promlog.New(&cfg.promlogConfig) + logger := promslog.New(&cfg.promslogConfig) writers, readers := buildClients(logger, cfg) if err := serve(logger, cfg.listenAddr, writers, readers); err != nil { - level.Error(logger).Log("msg", "Failed to listen", "addr", cfg.listenAddr, "err", err) + logger.Error("Failed to listen", "addr", cfg.listenAddr, "err", err) os.Exit(1) } } @@ -120,7 +119,7 @@ func parseFlags() *config { cfg := &config{ influxdbPassword: os.Getenv("INFLUXDB_PW"), - promlogConfig: promlog.Config{}, + promslogConfig: promslog.Config{}, } a.Flag("graphite-address", "The host:port of the Graphite server to send samples to. None, if empty."). @@ -146,7 +145,7 @@ func parseFlags() *config { a.Flag("web.telemetry-path", "Address to listen on for web endpoints."). Default("/metrics").StringVar(&cfg.telemetryPath) - flag.AddFlags(a, &cfg.promlogConfig) + flag.AddFlags(a, &cfg.promslogConfig) _, err := a.Parse(os.Args[1:]) if err != nil { @@ -168,19 +167,19 @@ type reader interface { Name() string } -func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { +func buildClients(logger *slog.Logger, cfg *config) ([]writer, []reader) { var writers []writer var readers []reader if cfg.graphiteAddress != "" { c := graphite.NewClient( - log.With(logger, "storage", "Graphite"), + logger.With("storage", "Graphite"), cfg.graphiteAddress, cfg.graphiteTransport, cfg.remoteTimeout, cfg.graphitePrefix) writers = append(writers, c) } if cfg.opentsdbURL != "" { c := opentsdb.NewClient( - log.With(logger, "storage", "OpenTSDB"), + logger.With("storage", "OpenTSDB"), cfg.opentsdbURL, cfg.remoteTimeout, ) @@ -189,7 +188,7 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { if cfg.influxdbURL != "" { url, err := url.Parse(cfg.influxdbURL) if err != nil { - level.Error(logger).Log("msg", "Failed to parse InfluxDB URL", "url", cfg.influxdbURL, "err", err) + logger.Error("Failed to parse InfluxDB URL", "url", cfg.influxdbURL, "err", err) os.Exit(1) } conf := influx.HTTPConfig{ @@ -199,7 +198,7 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { Timeout: cfg.remoteTimeout, } c := influxdb.NewClient( - log.With(logger, "storage", "InfluxDB"), + logger.With("storage", "InfluxDB"), conf, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy, @@ -208,15 +207,15 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { writers = append(writers, c) readers = append(readers, c) } - level.Info(logger).Log("msg", "Starting up...") + logger.Info("Starting up...") return writers, readers } -func serve(logger log.Logger, addr string, writers []writer, readers []reader) error { +func serve(logger *slog.Logger, addr string, writers []writer, readers []reader) error { http.HandleFunc("/write", func(w http.ResponseWriter, r *http.Request) { req, err := remote.DecodeWriteRequest(r.Body) if err != nil { - level.Error(logger).Log("msg", "Read error", "err", err.Error()) + logger.Error("Read error", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -238,21 +237,21 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e http.HandleFunc("/read", func(w http.ResponseWriter, r *http.Request) { compressed, err := io.ReadAll(r.Body) if err != nil { - level.Error(logger).Log("msg", "Read error", "err", err.Error()) + logger.Error("Read error", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } reqBuf, err := snappy.Decode(nil, compressed) if err != nil { - level.Error(logger).Log("msg", "Decode error", "err", err.Error()) + logger.Error("Decode error", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } var req prompb.ReadRequest if err := proto.Unmarshal(reqBuf, &req); err != nil { - level.Error(logger).Log("msg", "Unmarshal error", "err", err.Error()) + logger.Error("Unmarshal error", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -267,7 +266,7 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e var resp *prompb.ReadResponse resp, err = reader.Read(&req) if err != nil { - level.Warn(logger).Log("msg", "Error executing query", "query", req, "storage", reader.Name(), "err", err) + logger.Warn("Error executing query", "query", req, "storage", reader.Name(), "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -283,7 +282,7 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e compressed = snappy.Encode(nil, data) if _, err := w.Write(compressed); err != nil { - level.Warn(logger).Log("msg", "Error writing response", "storage", reader.Name(), "err", err) + logger.Warn("Error writing response", "storage", reader.Name(), "err", err) } }) @@ -309,12 +308,12 @@ func protoToSamples(req *prompb.WriteRequest) model.Samples { return samples } -func sendSamples(logger log.Logger, w writer, samples model.Samples) { +func sendSamples(logger *slog.Logger, w writer, samples model.Samples) { begin := time.Now() err := w.Write(samples) duration := time.Since(begin).Seconds() if err != nil { - level.Warn(logger).Log("msg", "Error sending samples to remote storage", "err", err, "storage", w.Name(), "num_samples", len(samples)) + logger.Warn("Error sending samples to remote storage", "err", err, "storage", w.Name(), "num_samples", len(samples)) failedSamples.WithLabelValues(w.Name()).Add(float64(len(samples))) } sentSamples.WithLabelValues(w.Name()).Add(float64(len(samples))) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go index abb1d0b7d3..433c70527a 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go @@ -19,13 +19,12 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "math" "net/http" "net/url" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" ) @@ -36,14 +35,14 @@ const ( // Client allows sending batches of Prometheus samples to OpenTSDB. type Client struct { - logger log.Logger + logger *slog.Logger url string timeout time.Duration } // NewClient creates a new Client. -func NewClient(logger log.Logger, url string, timeout time.Duration) *Client { +func NewClient(logger *slog.Logger, url string, timeout time.Duration) *Client { return &Client{ logger: logger, url: url, @@ -78,7 +77,7 @@ func (c *Client) Write(samples model.Samples) error { for _, s := range samples { v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send value to OpenTSDB, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send value to OpenTSDB, skipping sample", "value", v, "sample", s) continue } metric := TagValue(s.Metric[model.MetricNameLabel]) diff --git a/go.mod b/go.mod index 845e3277b8..3399ffb002 100644 --- a/go.mod +++ b/go.mod @@ -17,23 +17,21 @@ require ( github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 github.com/cespare/xxhash/v2 v2.3.0 github.com/dennwc/varint v1.0.0 - github.com/digitalocean/godo v1.122.0 - github.com/docker/docker v27.2.0+incompatible + github.com/digitalocean/godo v1.126.0 + github.com/docker/docker v27.3.1+incompatible github.com/edsrzf/mmap-go v1.1.0 github.com/envoyproxy/go-control-plane v0.13.0 github.com/envoyproxy/protoc-gen-validate v1.1.0 github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/fsnotify/fsnotify v1.7.0 - github.com/go-kit/log v0.2.1 - github.com/go-logfmt/logfmt v0.6.0 github.com/go-openapi/strfmt v0.23.0 - github.com/go-zookeeper/zk v1.0.3 + github.com/go-zookeeper/zk v1.0.4 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/google/go-cmp v0.6.0 github.com/google/pprof v0.0.0-20240711041743-f6c9dda6c6da github.com/google/uuid v1.6.0 - github.com/gophercloud/gophercloud v1.14.0 + github.com/gophercloud/gophercloud v1.14.1 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc github.com/grpc-ecosystem/grpc-gateway v1.16.0 github.com/hashicorp/consul/api v1.29.4 @@ -41,9 +39,9 @@ require ( github.com/hetznercloud/hcloud-go/v2 v2.13.1 github.com/ionos-cloud/sdk-go/v6 v6.2.1 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.17.9 + github.com/klauspost/compress v1.17.10 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b - github.com/linode/linodego v1.40.0 + github.com/linode/linodego v1.41.0 github.com/miekg/dns v1.1.62 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f @@ -52,52 +50,52 @@ require ( github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.6.0 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.3 + github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.59.1 + github.com/prometheus/common v0.60.0 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/sigv4 v0.1.0 - github.com/prometheus/exporter-toolkit v0.12.0 + github.com/prometheus/exporter-toolkit v0.13.0 github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c github.com/stretchr/testify v1.9.0 github.com/vultr/govultr/v2 v2.17.2 - go.opentelemetry.io/collector/pdata v1.14.1 - go.opentelemetry.io/collector/semconv v0.108.1 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 - go.opentelemetry.io/otel v1.29.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0 - go.opentelemetry.io/otel/sdk v1.29.0 - go.opentelemetry.io/otel/trace v1.29.0 + go.opentelemetry.io/collector/pdata v1.16.0 + go.opentelemetry.io/collector/semconv v0.110.0 + go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 + go.opentelemetry.io/otel v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 + go.opentelemetry.io/otel/sdk v1.30.0 + go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/atomic v1.11.0 - go.uber.org/automaxprocs v1.5.3 + go.uber.org/automaxprocs v1.6.0 go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 golang.org/x/oauth2 v0.23.0 golang.org/x/sync v0.8.0 - golang.org/x/sys v0.25.0 - golang.org/x/text v0.18.0 - golang.org/x/time v0.6.0 - golang.org/x/tools v0.24.0 - google.golang.org/api v0.196.0 - google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed - google.golang.org/grpc v1.66.0 + golang.org/x/sys v0.26.0 + golang.org/x/text v0.19.0 + golang.org/x/tools v0.26.0 + google.golang.org/api v0.199.0 + google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 + google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.34.2 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 - k8s.io/api v0.31.0 - k8s.io/apimachinery v0.31.0 - k8s.io/client-go v0.31.0 + k8s.io/api v0.31.1 + k8s.io/apimachinery v0.31.1 + k8s.io/client-go v0.31.1 k8s.io/klog v1.0.0 k8s.io/klog/v2 v2.130.1 ) require ( - cloud.google.com/go/auth v0.9.3 // indirect + cloud.google.com/go/auth v0.9.5 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect - cloud.google.com/go/compute/metadata v0.5.0 // indirect + cloud.google.com/go/compute/metadata v0.5.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect @@ -106,7 +104,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cilium/ebpf v0.11.0 // indirect - github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b // indirect + github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect github.com/containerd/cgroups/v3 v3.0.3 // indirect github.com/containerd/log v0.1.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect @@ -119,7 +117,6 @@ require ( github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/ghodss/yaml v1.0.0 // indirect - github.com/go-kit/kit v0.12.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/analysis v0.22.2 // indirect @@ -133,14 +130,14 @@ require ( github.com/go-resty/resty/v2 v2.13.1 // indirect github.com/godbus/dbus/v5 v5.0.4 // indirect github.com/golang-jwt/jwt/v5 v5.2.1 // indirect - github.com/golang/glog v1.2.1 // indirect + github.com/golang/glog v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/s2a-go v0.1.8 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.3.3 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect @@ -188,13 +185,14 @@ require ( github.com/xhit/go-str2duration/v2 v2.1.0 // indirect go.mongodb.org/mongo-driver v1.14.0 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/otel/metric v1.29.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect - golang.org/x/crypto v0.26.0 // indirect + golang.org/x/crypto v0.28.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect - golang.org/x/mod v0.20.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/term v0.23.0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/term v0.25.0 // indirect + golang.org/x/time v0.6.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect @@ -207,11 +205,6 @@ require ( sigs.k8s.io/yaml v1.4.0 // indirect ) -replace ( - k8s.io/klog => github.com/simonpasquier/klog-gokit v0.3.0 - k8s.io/klog/v2 => github.com/simonpasquier/klog-gokit/v3 v3.5.0 -) - // Exclude linodego v1.0.0 as it is no longer published on github. exclude github.com/linode/linodego v1.0.0 diff --git a/go.sum b/go.sum index edb5b650bd..1dce748ba7 100644 --- a/go.sum +++ b/go.sum @@ -12,8 +12,8 @@ cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bP cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/auth v0.9.3 h1:VOEUIAADkkLtyfr3BLa3R8Ed/j6w1jTBmARx+wb5w5U= -cloud.google.com/go/auth v0.9.3/go.mod h1:7z6VY+7h3KUdRov5F1i8NDP5ZzWKYmEPO842BgCsmTk= +cloud.google.com/go/auth v0.9.5 h1:4CTn43Eynw40aFVr3GpPqsQponx2jv0BQpjvajsbbzw= +cloud.google.com/go/auth v0.9.5/go.mod h1:Xo0n7n66eHyOWWCnitop6870Ilwo3PiZyodVkkH1xWM= cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= @@ -22,8 +22,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= -cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= +cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo= +cloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= @@ -61,13 +61,8 @@ github.com/Code-Hex/go-generics-cache v1.5.1/go.mod h1:qxcC9kRVrct9rHeiYpFWSoW1v github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/KimMachineGun/automemlimit v0.6.1 h1:ILa9j1onAAMadBsyyUJv5cack8Y1WT26yLj/V+ulKp8= github.com/KimMachineGun/automemlimit v0.6.1/go.mod h1:T7xYht7B8r6AG/AqFcUdc7fzd2bIdBKmepfP2S1svPY= -github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -78,23 +73,17 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 h1:t3eaIm0rUkzbrIewtiFmMK5RXHej2XnoXNhxVsAYUfg= github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= -github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= -github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= -github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -102,8 +91,6 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= -github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -117,24 +104,16 @@ github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= -github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw= -github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -142,29 +121,22 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/digitalocean/godo v1.122.0 h1:ziytLQi8QKtDp2K1A+YrYl2dWLHLh2uaMzWvcz9HkKg= -github.com/digitalocean/godo v1.122.0/go.mod h1:WQVH83OHUy6gC4gXpEVQKtxTd4L5oCp+5OialidkPLY= +github.com/digitalocean/godo v1.126.0 h1:+Znh7VMQj/E8ArbjWnc7OKGjWfzC+I8OCSRp7r1MdD8= +github.com/digitalocean/godo v1.126.0/go.mod h1:PU8JB6I1XYkQIdHFop8lLAY9ojp6M0XcU0TWaQSxbrc= github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= -github.com/docker/docker v27.2.0+incompatible h1:Rk9nIVdfH3+Vz4cyI/uhbINhEZ/oLmc+CBXmH6fbNk4= -github.com/docker/docker v27.2.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v27.3.1+incompatible h1:KttF0XoteNTicmUtBO0L2tP+J7FGRFTjaEF4k6WdhfI= +github.com/docker/docker v27.3.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= -github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ= github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -182,11 +154,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= -github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= @@ -198,17 +167,11 @@ github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o= -github.com/go-kit/kit v0.12.0 h1:e4o3o3IsBfAKQh5Qbbiqyfu97Ku7jrO/JbohvztANh4= -github.com/go-kit/kit v0.12.0/go.mod h1:lHd+EkCZPIwYItmGDDRdhinkzX2A1sj+M9biaEaizzs= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= -github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= -github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= -github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -234,26 +197,21 @@ github.com/go-openapi/validate v0.23.0 h1:2l7PJLzCis4YUGEoW6eoQw3WhyM65WSIcjX6SQ github.com/go-openapi/validate v0.23.0/go.mod h1:EeiAZ5bmpSIOJV1WLfyYF9qp/B1ZgSaEpHTJHtN5cbE= github.com/go-resty/resty/v2 v2.13.1 h1:x+LHXBI2nMB1vqndymf26quycC4aggYJ7DECYbiz03g= github.com/go-resty/resty/v2 v2.13.1/go.mod h1:GznXlLxkq6Nh4sU59rPmUw3VtgpO3aS96ORAI6Q7d+0= -github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/go-zookeeper/zk v1.0.3 h1:7M2kwOsc//9VeeFiPtf+uSJlVpU66x9Ba5+8XK7/TDg= -github.com/go-zookeeper/zk v1.0.3/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= +github.com/go-zookeeper/zk v1.0.4 h1:DPzxraQx7OrPyXq2phlGlNSIyWEsAox0RJmjTseMV6I= +github.com/go-zookeeper/zk v1.0.4/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.2.1 h1:OptwRhECazUx5ix5TTWC3EZhsZEHWcYWY4FQHTIubm4= -github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY= +github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -282,7 +240,6 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -324,40 +281,29 @@ github.com/google/pprof v0.0.0-20240711041743-f6c9dda6c6da/go.mod h1:K1liHPHnj73 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.3.3 h1:QRje2j5GZimBzlbhGA2V2QlGNgL8G6e+wGo/+/2bWI0= -github.com/googleapis/enterprise-certificate-proxy v0.3.3/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= +github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= +github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= -github.com/gophercloud/gophercloud v1.14.0 h1:Bt9zQDhPrbd4qX7EILGmy+i7GP35cc+AAL2+wIJpUE8= -github.com/gophercloud/gophercloud v1.14.0/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/gophercloud/gophercloud v1.14.1 h1:DTCNaTVGl8/cFu58O1JwWgis9gtISAFONqpMKNg/Vpw= +github.com/gophercloud/gophercloud v1.14.1/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= -github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= github.com/hashicorp/consul/api v1.29.4 h1:P6slzxDLBOxUSj3fWo2o65VuKtbtOXFi7TSSgtXutuE= github.com/hashicorp/consul/api v1.29.4/go.mod h1:HUlfw+l2Zy68ceJavv2zAyArl2fqhGWnMycyt56sBgg= github.com/hashicorp/consul/proto-public v0.6.2 h1:+DA/3g/IiKlJZb88NBn0ZgXrxJp2NlvCZdEyl+qxvL0= github.com/hashicorp/consul/proto-public v0.6.2/go.mod h1:cXXbOg74KBNGajC+o8RlA502Esf0R9prcoJgiOX/2Tg= -github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/consul/sdk v0.16.1 h1:V8TxTnImoPD5cj0U9Spl0TUxcytjcbbJeADFF07KdHg= github.com/hashicorp/consul/sdk v0.16.1/go.mod h1:fSXvwxB2hmh1FMZCNl6PwX0Q/1wdWtHJcZ7Ea5tns0s= github.com/hashicorp/cronexpr v1.1.2 h1:wG/ZYIKT+RT3QkOdgYc+xsKWVRgnxJ1OJtjjy84fJ9A= @@ -366,7 +312,6 @@ github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brv github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= @@ -384,7 +329,6 @@ github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9 github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= -github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= @@ -395,51 +339,38 @@ github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/b github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4= github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= -github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM= github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0= github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 h1:fgVfQ4AC1avVOnu2cfms8VAiD8lUq3vWI8mTocOXN/w= github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3/go.mod h1:svtxn6QnrQ69P23VvIWMR34tg3vmwLz4UdUzm1dSCgE= -github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY= github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4= github.com/hetznercloud/hcloud-go/v2 v2.13.1 h1:jq0GP4QaYE5d8xR/Zw17s9qoaESRJMXfGmtD1a/qckQ= github.com/hetznercloud/hcloud-go/v2 v2.13.1/go.mod h1:dhix40Br3fDiBhwaSG/zgaYOFFddpfBm/6R1Zz0IiF0= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/ionos-cloud/sdk-go/v6 v6.2.1 h1:mxxN+frNVmbFrmmFfXnBC3g2USYJrl6mc1LW2iNYbFY= github.com/ionos-cloud/sdk-go/v6 v6.2.1/go.mod h1:SXrO9OGyWjd2rZhAhEpdYN6VUAODzzqRdqA9BCviQtI= github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww= github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -447,15 +378,13 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= +github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b/go.mod h1:pcaDhQK0/NJZEvtCO0qQPPropqV0sJOJ6YW7X+9kRwM= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -470,11 +399,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= -github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/linode/linodego v1.40.0 h1:7ESY0PwK94hoggoCtIroT1Xk6b1flrFBNZ6KwqbTqlI= -github.com/linode/linodego v1.40.0/go.mod h1:NsUw4l8QrLdIofRg1NYFBbW5ZERnmbZykVBszPZLORM= -github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= +github.com/linode/linodego v1.41.0 h1:GcP7JIBr9iLRJ9FwAtb9/WCT1DuPJS/xUApapfdjtiY= +github.com/linode/linodego v1.41.0/go.mod h1:Ow4/XZ0yvWBzt3iAHwchvhSx30AyLintsSMvvQ2/SJY= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= @@ -485,7 +411,6 @@ github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= @@ -493,7 +418,6 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04nTH68g= github.com/maxatome/go-testdeep v1.12.0/go.mod h1:lPZc/HAcJMP92l7yI6TRz1aZN5URwUBUAfUNvrclaNM= @@ -501,23 +425,16 @@ github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ= github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ= -github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= -github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= @@ -538,64 +455,35 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= -github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU= -github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k= -github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= -github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 h1:dOYG7LS/WK00RWZc8XGgcUTlTxpp3mKhdR2Q9z9HbXM= github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= -github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= -github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= -github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= -github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= -github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= -github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= -github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxSfWAKL3wpBW7V8scJMt8N8gnaMCS9E/cA= -github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= -github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= -github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= -github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= -github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= -github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -608,54 +496,43 @@ github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I= github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4= -github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0= -github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= -github.com/prometheus/exporter-toolkit v0.12.0 h1:DkE5RcEZR3lQA2QD5JLVQIf41dFKNsVMXFhgqcif7fo= -github.com/prometheus/exporter-toolkit v0.12.0/go.mod h1:fQH0KtTn0yrrS0S82kqppRjDDiwMfIQUwT+RBRRhwUc= +github.com/prometheus/exporter-toolkit v0.13.0 h1:lmA0Q+8IaXgmFRKw09RldZmZdnvu9wwcDLIXGmTPw1c= +github.com/prometheus/exporter-toolkit v0.13.0/go.mod h1:2uop99EZl80KdXhv/MxVI2181fMcwlsumFOqBecGkG0= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 h1:yoKAVkEVwAqbGbR8n87rHQ1dulL25rKloGadb3vm770= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30/go.mod h1:sH0u6fq6x4R5M7WxkoQFY/o7UaiItec0o1LinLCJNq8= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= @@ -664,28 +541,14 @@ github.com/shoenig/test v1.7.1 h1:UJcjSAI3aUKx52kfcfhblgyhZceouhvvs3OYdWgn+PY= github.com/shoenig/test v1.7.1/go.mod h1:UxJ6u/x2v/TNs/LoLxBNJRV9DiwBBKYxXSyczsBHFoI= github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c h1:aqg5Vm5dwtvL+YgDpBcK1ITf3o96N/K7/wsRXQnUTEs= github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c/go.mod h1:owqhoLW1qZoYLZzLnBw+QkPP9WZnjlSWihhxAJC1+/M= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/simonpasquier/klog-gokit v0.3.0 h1:TkFK21cbwDRS+CiystjqbAiq5ubJcVTk9hLUck5Ntcs= -github.com/simonpasquier/klog-gokit v0.3.0/go.mod h1:+SUlDQNrhVtGt2FieaqNftzzk8P72zpWlACateWxA9k= -github.com/simonpasquier/klog-gokit/v3 v3.5.0 h1:ewnk+ickph0hkQFgdI4pffKIbruAxxWcg0Fe/vQmLOM= -github.com/simonpasquier/klog-gokit/v3 v3.5.0/go.mod h1:S9flvRzzpaYLYtXI2w8jf9R/IU/Cy14NrbvDUevNP1E= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -703,28 +566,20 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/vultr/govultr/v2 v2.17.2 h1:gej/rwr91Puc/tgh+j33p/BLR16UrIPnSr+AIwYWZQs= github.com/vultr/govultr/v2 v2.17.2/go.mod h1:ZFOKGWmgjytfyjeyAdhQlSWwTjh2ig+X49cAp50dzXI= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80= go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c= -go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= @@ -732,49 +587,42 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/collector/pdata v1.14.1 h1:wXZjtQA7Vy5HFqco+yA95ENyMQU5heBB1IxMHQf6mUk= -go.opentelemetry.io/collector/pdata v1.14.1/go.mod h1:z1dTjwwtcoXxZx2/nkHysjxMeaxe9pEmYTEr4SMNIx8= -go.opentelemetry.io/collector/semconv v0.108.1 h1:Txk9tauUnamZaxS5vlf1O0uZ4VD6nioRBR0nX8L/fU4= -go.opentelemetry.io/collector/semconv v0.108.1/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= -go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= -go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0 h1:nSiV3s7wiCam610XcLbYOmMfJxB9gO4uK3Xgv5gmTgg= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0/go.mod h1:hKn/e/Nmd19/x1gvIHwtOwVWM+VhuITSWip3JUDghj0= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0 h1:JAv0Jwtl01UFiyWZEMiJZBiTlv5A50zNs8lsthXqIio= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0/go.mod h1:QNKLmUEAq2QUbPQUfvw4fmv0bgbK7UlOSFCnXyfvSNc= -go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= -go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= -go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo= -go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok= -go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= -go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= +go.opentelemetry.io/collector/pdata v1.16.0 h1:g02K8jlRnmQ7TQDuXpdgVL6vIxIVqr5Gbb1qIR27rto= +go.opentelemetry.io/collector/pdata v1.16.0/go.mod h1:YZZJIt2ehxosYf/Y1pbvexjNWsIGNNrzzlCTO9jC1F4= +go.opentelemetry.io/collector/semconv v0.110.0 h1:KHQnOHe3gUz0zsxe8ph9kN5OTypCFD4V+06AiBTfeNk= +go.opentelemetry.io/collector/semconv v0.110.0/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 h1:4BZHA+B1wXEQoGNHxW8mURaLhcdGwvRnmhGbm+odRbc= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0/go.mod h1:3qi2EEwMgB4xnKgPLqsDP3j9qxnHDZeHsnAxfjQqTko= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 h1:lsInsfvhVIfOI6qHVyysXMNDnjO9Npvl7tlDPJFBVd4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0/go.mod h1:KQsVNh4OjgjTG0G6EiNi1jVpnaeeKsKMRwbLN+f1+8M= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 h1:m0yTiGDLUvVYaTFbAvCkVYIYcvwKt3G7OLoN77NUs/8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0/go.mod h1:wBQbT4UekBfegL2nx0Xk1vBcnzyBPsIVm9hRG4fYcr4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 h1:umZgi92IyxfXd/l4kaDhnKgY8rnN/cZcF1LKc6I8OQ8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0/go.mod h1:4lVs6obhSVRb1EW5FhOuBTyiQhtRtAnnva9vD3yRfq8= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE= +go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= -go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8= -go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -782,8 +630,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -818,17 +666,12 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -839,7 +682,6 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -865,8 +707,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -893,11 +735,7 @@ golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -908,13 +746,11 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -955,16 +791,16 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= -golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -976,24 +812,20 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= -golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -1005,8 +837,6 @@ golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1014,7 +844,6 @@ golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= @@ -1035,13 +864,12 @@ golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= -golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= @@ -1056,10 +884,9 @@ google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.196.0 h1:k/RafYqebaIJBO3+SMnfEGtFVlvp5vSgqTUF54UN/zg= -google.golang.org/api v0.196.0/go.mod h1:g9IL21uGkYgvQ5BZg6BAtoGJQIm8r6EgaAbpNey5wBE= +google.golang.org/api v0.199.0 h1:aWUXClp+VFJmqE0JPvpZOK3LDQMyFKYIow4etYd9qxs= +google.golang.org/api v0.199.0/go.mod h1:ohG4qSztDJmZdjK/Ar6MhbAmb/Rpi4JHOqagsh90K28= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= @@ -1070,7 +897,6 @@ google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= @@ -1094,19 +920,14 @@ google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1m google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed h1:3RgNmBoI9MZhsj3QxC+AP/qQhNwpCLOvYDYYsFrhFt0= -google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo= +google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 h1:hjSy6tcFQZ171igDaN5QHOw2n6vx40juYbC/x67CEhc= +google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:qpvKtACPCQhAdu3PyQgV4l3LMXZEtft7y8QcarRsp9I= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= -google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= @@ -1116,8 +937,8 @@ google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3Iji google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c= -google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1137,20 +958,13 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -1166,7 +980,6 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= -honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= @@ -1174,12 +987,16 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo= -k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE= -k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc= -k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= -k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8= -k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU= +k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= +k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI= +k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= +k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0= +k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg= +k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= +k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= @@ -1191,7 +1008,5 @@ sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMm sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= -sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 2a37ea66d4..a6ad47acd3 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -230,6 +230,17 @@ func (h *FloatHistogram) TestExpression() string { res = append(res, fmt.Sprintf("custom_values:%g", m.CustomValues)) } + switch m.CounterResetHint { + case UnknownCounterReset: + // Unknown is the default, don't add anything. + case CounterReset: + res = append(res, "counter_reset_hint:reset") + case NotCounterReset: + res = append(res, "counter_reset_hint:not_reset") + case GaugeType: + res = append(res, "counter_reset_hint:gauge") + } + addBuckets := func(kind, bucketsKey, offsetKey string, buckets []float64, spans []Span) []string { if len(spans) > 1 { panic(fmt.Sprintf("histogram with multiple %s spans not supported", kind)) @@ -293,6 +304,14 @@ func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { h.ZeroCount /= scalar h.Count /= scalar h.Sum /= scalar + // Division by zero removes all buckets. + if scalar == 0 { + h.PositiveBuckets = nil + h.NegativeBuckets = nil + h.PositiveSpans = nil + h.NegativeSpans = nil + return h + } for i := range h.PositiveBuckets { h.PositiveBuckets[i] /= scalar } @@ -342,7 +361,7 @@ func (h *FloatHistogram) Add(other *FloatHistogram) (*FloatHistogram, error) { default: // All other cases shouldn't actually happen. // They are a direct collision of CounterReset and NotCounterReset. - // Conservatively set the CounterResetHint to "unknown" and isse a warning. + // Conservatively set the CounterResetHint to "unknown" and issue a warning. h.CounterResetHint = UnknownCounterReset // TODO(trevorwhitney): Actually issue the warning as soon as the plumbing for it is in place } @@ -658,7 +677,7 @@ func detectReset(currIt, prevIt *floatBucketIterator) bool { if !currIt.Next() { // Reached end of currIt early, therefore // previous histogram has a bucket that the - // current one does not have. Unlass all + // current one does not have. Unless all // remaining buckets in the previous histogram // are unpopulated, this is a reset. for { @@ -891,7 +910,7 @@ func (h *FloatHistogram) trimBucketsInZeroBucket() { // reconcileZeroBuckets finds a zero bucket large enough to include the zero // buckets of both histograms (the receiving histogram and the other histogram) // with a zero threshold that is not within a populated bucket in either -// histogram. This method modifies the receiving histogram accourdingly, but +// histogram. This method modifies the receiving histogram accordingly, but // leaves the other histogram as is. Instead, it returns the zero count the // other histogram would have if it were modified. func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 { diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index 1558a6d679..34988e9d39 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -131,6 +131,54 @@ func TestFloatHistogramMul(t *testing.T) { NegativeBuckets: []float64{9, 3, 15, 18}, }, }, + { + "negation", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -1, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -11, + Count: -30, + Sum: -23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-1, 0, -3, -4, -7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-3, -1, -5, -6}, + }, + }, + { + "negative multiplier", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -2, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -22, + Count: -60, + Sum: -46, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-2, 0, -6, -8, -14}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-6, -2, -10, -12}, + }, + }, { "no-op with custom buckets", &FloatHistogram{ @@ -351,14 +399,10 @@ func TestFloatHistogramDiv(t *testing.T) { }, 0, &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: math.Inf(1), - Count: math.Inf(1), - Sum: math.Inf(1), - PositiveSpans: []Span{{-2, 1}, {2, 3}}, - PositiveBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, - NegativeSpans: []Span{{3, 2}, {3, 2}}, - NegativeBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, + ZeroThreshold: 0.01, + Count: math.Inf(1), + Sum: math.Inf(1), + ZeroCount: math.Inf(1), }, }, { @@ -409,6 +453,54 @@ func TestFloatHistogramDiv(t *testing.T) { NegativeBuckets: []float64{1.5, 0.5, 2.5, 3}, }, }, + { + "negation", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 5.5, + Count: 3493.3, + Sum: 2349209.324, + PositiveSpans: []Span{{-2, 1}, {2, 3}}, + PositiveBuckets: []float64{1, 3.3, 4.2, 0.1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3.1, 3, 1.234e5, 1000}, + }, + -1, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -5.5, + Count: -3493.3, + Sum: -2349209.324, + PositiveSpans: []Span{{-2, 1}, {2, 3}}, + PositiveBuckets: []float64{-1, -3.3, -4.2, -0.1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-3.1, -3, -1.234e5, -1000}, + }, + }, + { + "negative half", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -2, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -5.5, + Count: -15, + Sum: -11.5, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-0.5, 0, -1.5, -2, -3.5}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-1.5, -0.5, -2.5, -3}, + }, + }, { "no-op with custom buckets", &FloatHistogram{ diff --git a/model/labels/labels_common.go b/model/labels/labels_common.go index d7bdc1e076..99529a3836 100644 --- a/model/labels/labels_common.go +++ b/model/labels/labels_common.go @@ -230,5 +230,5 @@ func contains(s []Label, n string) bool { } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index c8bce51234..c64bb990e0 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -16,7 +16,6 @@ package labels import ( - "reflect" "slices" "strings" "unsafe" @@ -299,10 +298,8 @@ func Equal(ls, o Labels) bool { func EmptyLabels() Labels { return Labels{} } -func yoloBytes(s string) (b []byte) { - *(*string)(unsafe.Pointer(&b)) = s - (*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s) - return +func yoloBytes(s string) []byte { + return unsafe.Slice(unsafe.StringData(s), len(s)) } // New returns a sorted Labels from the given labels. @@ -338,8 +335,8 @@ func Compare(a, b Labels) int { } i := 0 // First, go 8 bytes at a time. Data strings are expected to be 8-byte aligned. - sp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&shorter)).Data) - lp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&longer)).Data) + sp := unsafe.Pointer(unsafe.StringData(shorter)) + lp := unsafe.Pointer(unsafe.StringData(longer)) for ; i < len(shorter)-8; i += 8 { if *(*uint64)(unsafe.Add(sp, i)) != *(*uint64)(unsafe.Add(lp, i)) { break diff --git a/model/labels/regexp.go b/model/labels/regexp.go index d2151d83dd..3df9435194 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -63,13 +63,13 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { // available, even if the string matcher is faster. m.matchString = m.stringMatcher.Matches } else { - parsed, err := syntax.Parse(v, syntax.Perl) + parsed, err := syntax.Parse(v, syntax.Perl|syntax.DotNL) if err != nil { return nil, err } // Simplify the syntax tree to run faster. parsed = parsed.Simplify() - m.re, err = regexp.Compile("^(?:" + parsed.String() + ")$") + m.re, err = regexp.Compile("^(?s:" + parsed.String() + ")$") if err != nil { return nil, err } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 24875e64ef..8df0dbb023 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -121,7 +121,7 @@ func TestFastRegexMatcher_MatchString(t *testing.T) { t.Parallel() m, err := NewFastRegexMatcher(r) require.NoError(t, err) - re := regexp.MustCompile("^(?:" + r + ")$") + re := regexp.MustCompile("^(?s:" + r + ")$") require.Equal(t, re.MatchString(v), m.MatchString(v)) }) } @@ -167,7 +167,7 @@ func TestOptimizeConcatRegex(t *testing.T) { } for _, c := range cases { - parsed, err := syntax.Parse(c.regex, syntax.Perl) + parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL) require.NoError(t, err) prefix, suffix, contains := optimizeConcatRegex(parsed) @@ -248,7 +248,7 @@ func TestFindSetMatches(t *testing.T) { c := c t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matches, actualCaseSensitive := findSetMatches(parsed) require.Equal(t, c.expMatches, matches) @@ -348,15 +348,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { pattern string exp StringMatcher }{ - {".*", anyStringWithoutNewlineMatcher{}}, - {".*?", anyStringWithoutNewlineMatcher{}}, + {".*", trueMatcher{}}, + {".*?", trueMatcher{}}, {"(?s:.*)", trueMatcher{}}, - {"(.*)", anyStringWithoutNewlineMatcher{}}, - {"^.*$", anyStringWithoutNewlineMatcher{}}, - {".+", &anyNonEmptyStringMatcher{matchNL: false}}, + {"(.*)", trueMatcher{}}, + {"^.*$", trueMatcher{}}, + {".+", &anyNonEmptyStringMatcher{matchNL: true}}, {"(?s:.+)", &anyNonEmptyStringMatcher{matchNL: true}}, - {"^.+$", &anyNonEmptyStringMatcher{matchNL: false}}, - {"(.+)", &anyNonEmptyStringMatcher{matchNL: false}}, + {"^.+$", &anyNonEmptyStringMatcher{matchNL: true}}, + {"(.+)", &anyNonEmptyStringMatcher{matchNL: true}}, {"", emptyStringMatcher{}}, {"^$", emptyStringMatcher{}}, {"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}}, @@ -366,23 +366,23 @@ func TestStringMatcherFromRegexp(t *testing.T) { {`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, {"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, - {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: anyStringWithoutNewlineMatcher{}}}, - {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, + {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: trueMatcher{}}}, + {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, {"10\\.0\\.(1|2)\\.+", nil}, - {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}}, - {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: anyStringWithoutNewlineMatcher{}}}, - {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, - {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})}, - {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: nil}}, + {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{matchNL: true}, suffix: "foo", suffixCaseSensitive: true}}, + {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: trueMatcher{}}}, + {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, + {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: true}}})}, + {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: nil}}, // we don't support case insensitive matching for contains. // This is because there's no strings.IndexOfFold function. // We can revisit later if this is really popular by using strings.ToUpper. @@ -393,15 +393,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { {".*foo.*bar.*", nil}, {`\d*`, nil}, {".", nil}, - {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: anyStringWithoutNewlineMatcher{}}}}}, + {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: trueMatcher{}}}}}, // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // It would make the code too complex to handle it. {"(.+)/(foo.*|bar$)", nil}, // Case sensitive alternate with same literal prefix and .* suffix. - {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: anyStringWithoutNewlineMatcher{}}}}}, + {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: trueMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: trueMatcher{}}}}}, // Case insensitive alternate with same literal prefix and .* suffix. - {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, - {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, + {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}}, + {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}}, // Concatenated variable length selectors are not supported. {"foo.*.*", nil}, {"foo.+.+", nil}, @@ -410,15 +410,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"aaa.?.?", nil}, {"aaa.?.*", nil}, // Regexps with ".?". - {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, + {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, {"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, - {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}}, + {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}}, {"f.?o", nil}, } { c := c t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matches := stringMatcherFromRegexp(parsed) require.Equal(t, c.exp, matches) @@ -437,16 +437,16 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) { { pattern: "(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", expectedLiteralPrefixMatchers: 3, - expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX"}, - expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "xyz-016a-ixb-dp\n"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"}, + expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Case insensitive. { pattern: "(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", expectedLiteralPrefixMatchers: 3, - expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX"}, - expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp", "xyz-016a-ixb-dp\n"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"}, + expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp"}, }, // Nested literal prefixes, case sensitive. @@ -474,13 +474,13 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) { }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher. numPrefixMatchers := 0 @@ -523,16 +523,16 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) { { pattern: "(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", expectedLiteralSuffixMatchers: 2, - expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op"}, - expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"}, + expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Case insensitive. { pattern: "(?i)(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", expectedLiteralSuffixMatchers: 2, - expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op"}, - expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"}, + expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Nested literal suffixes, case sensitive. @@ -552,13 +552,13 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) { }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains literalSuffixStringMatcher. numSuffixMatchers := 0 @@ -598,26 +598,26 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) { { pattern: "test.?", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"test", "test!"}, - expectedNotMatches: []string{"test\n", "tes", "test!!"}, + expectedMatches: []string{"test\n", "test", "test!"}, + expectedNotMatches: []string{"tes", "test!!"}, }, { pattern: ".?test", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"test", "!test"}, - expectedNotMatches: []string{"\ntest", "tes", "test!"}, + expectedMatches: []string{"\ntest", "test", "!test"}, + expectedNotMatches: []string{"tes", "test!"}, }, { pattern: "(aaa.?|bbb.?)", expectedZeroOrOneMatchers: 2, - expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX"}, - expectedNotMatches: []string{"aa", "aaaXX", "aaa\n", "bb", "bbbXX", "bbb\n"}, + expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX", "aaa\n", "bbb\n"}, + expectedNotMatches: []string{"aa", "aaaXX", "bb", "bbbXX"}, }, { pattern: ".*aaa.?", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX"}, - expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX", "XXXaaa\n"}, + expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX", "XXXaaa\n"}, + expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX"}, }, // Match newline. @@ -632,18 +632,18 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) { { pattern: "(aaa.?|((?s).?bbb.+))", expectedZeroOrOneMatchers: 2, - expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX"}, - expectedNotMatches: []string{"aa", "aaa\n", "Xbbb", "\nbbb"}, + expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX", "aaa\n"}, + expectedNotMatches: []string{"aa", "Xbbb", "\nbbb"}, }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains zeroOrOneCharacterStringMatcher. numZeroOrOneMatchers := 0 @@ -1112,7 +1112,7 @@ func BenchmarkOptimizeEqualOrPrefixStringMatchers(b *testing.B) { } b.Logf("regexp: %s", re) - parsed, err := syntax.Parse(re, syntax.Perl) + parsed, err := syntax.Parse(re, syntax.Perl|syntax.DotNL) require.NoError(b, err) unoptimized := stringMatcherFromRegexpInternal(parsed) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index a880465969..eb79f7be21 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -171,7 +171,7 @@ type Regexp struct { // NewRegexp creates a new anchored Regexp and returns an error if the // passed-in regular expression does not compile. func NewRegexp(s string) (Regexp, error) { - regex, err := regexp.Compile("^(?:" + s + ")$") + regex, err := regexp.Compile("^(?s:" + s + ")$") return Regexp{Regexp: regex}, err } @@ -218,8 +218,8 @@ func (re Regexp) String() string { } str := re.Regexp.String() - // Trim the anchor `^(?:` prefix and `)$` suffix. - return str[4 : len(str)-2] + // Trim the anchor `^(?s:` prefix and `)$` suffix. + return str[5 : len(str)-2] } // Process returns a relabeled version of the given label set. The relabel configurations @@ -277,6 +277,13 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return false } case Replace: + // Fast path to add or delete label pair. + if val == "" && cfg.Regex == DefaultRelabelConfig.Regex && + !varInRegexTemplate(cfg.TargetLabel) && !varInRegexTemplate(cfg.Replacement) { + lb.Set(cfg.TargetLabel, cfg.Replacement) + break + } + indexes := cfg.Regex.FindStringSubmatchIndex(val) // If there is no match no replacement must take place. if indexes == nil { @@ -326,3 +333,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return true } + +func varInRegexTemplate(template string) bool { + return strings.Contains(template, "$") +} diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index fc9952134d..0c6d41f5e3 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -569,6 +569,29 @@ func TestRelabel(t *testing.T) { }, drop: true, }, + { + input: labels.FromMap(map[string]string{ + "a": "line1\nline2", + "b": "bar", + "c": "baz", + }), + relabel: []*Config{ + { + SourceLabels: model.LabelNames{"a"}, + Regex: MustNewRegexp("line1.*line2"), + TargetLabel: "d", + Separator: ";", + Replacement: "match${1}", + Action: Replace, + }, + }, + output: labels.FromMap(map[string]string{ + "a": "line1\nline2", + "b": "bar", + "c": "baz", + "d": "match", + }), + }, } for _, test := range tests { @@ -838,6 +861,34 @@ func BenchmarkRelabel(b *testing.B) { "__scrape_timeout__", "10s", "job", "kubernetes-pods"), }, + { + name: "static label pair", + config: ` + - replacement: wwwwww + target_label: wwwwww + - replacement: yyyyyyyyyyyy + target_label: xxxxxxxxx + - replacement: xxxxxxxxx + target_label: yyyyyyyyyyyy + - source_labels: ["something"] + target_label: with_source_labels + replacement: value + - replacement: dropped + target_label: ${0} + - replacement: ${0} + target_label: dropped`, + lbls: labels.FromStrings( + "abcdefg01", "hijklmn1", + "abcdefg02", "hijklmn2", + "abcdefg03", "hijklmn3", + "abcdefg04", "hijklmn4", + "abcdefg05", "hijklmn5", + "abcdefg06", "hijklmn6", + "abcdefg07", "hijklmn7", + "abcdefg08", "hijklmn8", + "job", "foo", + ), + }, } for i := range tests { err := yaml.UnmarshalStrict([]byte(tests[i].config), &tests[i].cfgs) diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index bfb85ce740..ef6ac17fe3 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -111,6 +111,20 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { ) } + for k, v := range g.Labels { + if !model.LabelName(k).IsValid() || k == model.MetricNameLabel { + errs = append( + errs, fmt.Errorf("invalid label name: %s", k), + ) + } + + if !model.LabelValue(v).IsValid() { + errs = append( + errs, fmt.Errorf("invalid label value: %s", v), + ) + } + } + set[g.Name] = struct{}{} for i, r := range g.Rules { @@ -136,11 +150,12 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { // RuleGroup is a list of sequentially evaluated recording and alerting rules. type RuleGroup struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - QueryOffset *model.Duration `yaml:"query_offset,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } // Rule describes an alerting or recording rule. diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index ef5008f4bf..73ea174594 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -85,9 +85,8 @@ func TestParseFileFailure(t *testing.T) { for _, c := range table { _, errs := ParseFile(filepath.Join("testdata", c.filename)) - require.NotNil(t, errs, "Expected error parsing %s but got none", c.filename) - require.Error(t, errs[0]) - require.Containsf(t, errs[0].Error(), c.errMsg, "Expected error for %s.", c.filename) + require.NotEmpty(t, errs, "Expected error parsing %s but got none", c.filename) + require.ErrorContainsf(t, errs[0], c.errMsg, "Expected error for %s.", c.filename) } } @@ -108,6 +107,23 @@ groups: severity: "page" annotations: summary: "Instance {{ $labels.instance }} down" +`, + shouldPass: true, + }, + { + ruleString: ` +groups: +- name: example + labels: + team: myteam + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: "page" + annotations: + summary: "Instance {{ $labels.instance }} down" `, shouldPass: true, }, @@ -259,8 +275,7 @@ func TestError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := tt.error.Error() - require.Equal(t, tt.want, got) + require.EqualError(t, tt.error, tt.want) }) } } @@ -308,8 +323,7 @@ func TestWrappedError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := tt.wrappedError.Error() - require.Equal(t, tt.want, got) + require.EqualError(t, tt.wrappedError, tt.want) }) } } diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go new file mode 100644 index 0000000000..bd0d5089ac --- /dev/null +++ b/model/textparse/benchmark_test.go @@ -0,0 +1,185 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "testing" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/labels" + + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" +) + +type newParser func([]byte, *labels.SymbolTable) Parser + +var newTestParserFns = map[string]newParser{ + "promtext": NewPromParser, + "promproto": func(b []byte, st *labels.SymbolTable) Parser { + return NewProtobufParser(b, true, st) + }, + "omtext": func(b []byte, st *labels.SymbolTable) Parser { + return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) + }, + "omtext_with_nhcb": func(b []byte, st *labels.SymbolTable) Parser { + p := NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) + return NewNHCBParser(p, st, false) + }, +} + +// BenchmarkParse benchmarks parsing, mimicking how scrape/scrape.go#append use it. +// Typically used as follows: +/* + export bench=v1 && go test ./model/textparse/... \ + -run '^$' -bench '^BenchmarkParse' \ + -benchtime 2s -count 6 -cpu 2 -benchmem -timeout 999m \ + | tee ${bench}.txt +*/ +// For profiles, add -memprofile=${bench}.mem.pprof -cpuprofile=${bench}.cpu.pprof +// options. +// +// NOTE(bwplotka): Previous iterations of this benchmark had different cases for isolated +// Series, Series+Metrics with and without reuse, Series+CT. Those cases are sometimes +// good to know if you are working on a certain optimization, but it does not +// make sense to persist such cases for everybody (e.g. for CI one day). +// For local iteration, feel free to adjust cases/comment out code etc. +// +// NOTE(bwplotka): Do not try to conclude "what parser (OM, proto, prom) is the fastest" +// as the testdata has different amount and type of metrics and features (e.g. exemplars). +func BenchmarkParse(b *testing.B) { + for _, bcase := range []struct { + dataFile string // Localized to "./testdata". + dataProto []byte + parser string + + compareToExpfmtFormat expfmt.FormatType + }{ + {dataFile: "promtestdata.txt", parser: "promtext", compareToExpfmtFormat: expfmt.TypeTextPlain}, + {dataFile: "promtestdata.nometa.txt", parser: "promtext", compareToExpfmtFormat: expfmt.TypeTextPlain}, + + // We don't pass compareToExpfmtFormat: expfmt.TypeProtoDelim as expfmt does not support GAUGE_HISTOGRAM, see https://github.com/prometheus/common/issues/430. + {dataProto: createTestProtoBuf(b).Bytes(), parser: "promproto"}, + + // We don't pass compareToExpfmtFormat: expfmt.TypeOpenMetrics as expfmt does not support OM exemplars, see https://github.com/prometheus/common/issues/703. + {dataFile: "omtestdata.txt", parser: "omtext"}, + {dataFile: "promtestdata.txt", parser: "omtext"}, // Compare how omtext parser deals with Prometheus text format vs promtext. + + // NHCB. + {dataFile: "omhistogramdata.txt", parser: "omtext"}, // Measure OM parser baseline for histograms. + {dataFile: "omhistogramdata.txt", parser: "omtext_with_nhcb"}, // Measure NHCB over OM parser. + } { + var buf []byte + dataCase := bcase.dataFile + if len(bcase.dataProto) > 0 { + dataCase = "createTestProtoBuf()" + buf = bcase.dataProto + } else { + f, err := os.Open(filepath.Join("testdata", bcase.dataFile)) + require.NoError(b, err) + b.Cleanup(func() { + _ = f.Close() + }) + buf, err = io.ReadAll(f) + require.NoError(b, err) + } + b.Run(fmt.Sprintf("data=%v/parser=%v", dataCase, bcase.parser), func(b *testing.B) { + newParserFn := newTestParserFns[bcase.parser] + var ( + res labels.Labels + e exemplar.Exemplar + ) + + b.SetBytes(int64(len(buf))) + b.ReportAllocs() + b.ResetTimer() + + st := labels.NewSymbolTable() + for i := 0; i < b.N; i++ { + p := newParserFn(buf, st) + + Inner: + for { + t, err := p.Next() + switch t { + case EntryInvalid: + if errors.Is(err, io.EOF) { + break Inner + } + b.Fatal(err) + case EntryType: + _, _ = p.Type() + continue + case EntryHelp: + _, _ = p.Help() + continue + case EntryUnit: + _, _ = p.Unit() + continue + case EntryComment: + continue + case EntryHistogram: + _, _, _, _ = p.Histogram() + case EntrySeries: + _, _, _ = p.Series() + default: + b.Fatal("not implemented entry", t) + } + + _ = p.Metric(&res) + _ = p.CreatedTimestamp() + for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) { + } + } + } + }) + + b.Run(fmt.Sprintf("data=%v/parser=xpfmt", dataCase), func(b *testing.B) { + if bcase.compareToExpfmtFormat == expfmt.TypeUnknown { + b.Skip("compareToExpfmtFormat not set") + } + + b.SetBytes(int64(len(buf))) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + decSamples := make(model.Vector, 0, 50) + sdec := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.NewFormat(bcase.compareToExpfmtFormat)), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.TimeFromUnixNano(0), + }, + } + + for { + if err := sdec.Decode(&decSamples); err != nil { + if errors.Is(err, io.EOF) { + break + } + b.Fatal(err) + } + decSamples = decSamples[:0] + } + } + }) + } +} diff --git a/model/textparse/interface.go b/model/textparse/interface.go index 0b5d9281e4..2682855281 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -14,6 +14,8 @@ package textparse import ( + "errors" + "fmt" "mime" "github.com/prometheus/common/model" @@ -23,8 +25,7 @@ import ( "github.com/prometheus/prometheus/model/labels" ) -// Parser parses samples from a byte slice of samples in the official -// Prometheus and OpenMetrics text exposition formats. +// Parser parses samples from a byte slice of samples in different exposition formats. type Parser interface { // Series returns the bytes of a series with a simple float64 as a // value, the timestamp if set, and the value of the current sample. @@ -58,6 +59,8 @@ type Parser interface { // Metric writes the labels of the current sample into the passed labels. // It returns the string from which the metric was parsed. + // The values of the "le" labels of classic histograms and "quantile" labels + // of summaries should follow the OpenMetrics formatting rules. Metric(l *labels.Labels) string // Exemplar writes the exemplar of the current sample into the passed @@ -69,6 +72,8 @@ type Parser interface { // CreatedTimestamp returns the created timestamp (in milliseconds) for the // current sample. It returns nil if it is unknown e.g. if it wasn't set, // if the scrape protocol or metric type does not support created timestamps. + // Assume the CreatedTimestamp returned pointer is only valid until + // the Next iteration. CreatedTimestamp() *int64 // Next advances the parser to the next sample. @@ -76,26 +81,65 @@ type Parser interface { Next() (Entry, error) } -// New returns a new parser of the byte slice. -// -// This function always returns a valid parser, but might additionally -// return an error if the content type cannot be parsed. -func New(b []byte, contentType string, parseClassicHistograms bool, st *labels.SymbolTable) (Parser, error) { +// extractMediaType returns the mediaType of a required parser. It tries first to +// extract a valid and supported mediaType from contentType. If that fails, +// the provided fallbackType (possibly an empty string) is returned, together with +// an error. fallbackType is used as-is without further validation. +func extractMediaType(contentType, fallbackType string) (string, error) { if contentType == "" { - return NewPromParser(b, st), nil + if fallbackType == "" { + return "", errors.New("non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target") + } + return fallbackType, fmt.Errorf("non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol %q", fallbackType) } + // We have a contentType, parse it. mediaType, _, err := mime.ParseMediaType(contentType) if err != nil { - return NewPromParser(b, st), err + if fallbackType == "" { + retErr := fmt.Errorf("cannot parse Content-Type %q and no fallback_scrape_protocol for target", contentType) + return "", errors.Join(retErr, err) + } + retErr := fmt.Errorf("could not parse received Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) + return fallbackType, errors.Join(retErr, err) } + + // We have a valid media type, either we recognise it and can use it + // or we have to error. + switch mediaType { + case "application/openmetrics-text", "application/vnd.google.protobuf", "text/plain": + return mediaType, nil + } + // We're here because we have no recognised mediaType. + if fallbackType == "" { + return "", fmt.Errorf("received unsupported Content-Type %q and no fallback_scrape_protocol specified for target", contentType) + } + return fallbackType, fmt.Errorf("received unsupported Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) +} + +// New returns a new parser of the byte slice. +// +// This function no longer guarantees to return a valid parser. +// +// It only returns a valid parser if the supplied contentType and fallbackType allow. +// An error may also be returned if fallbackType had to be used or there was some +// other error parsing the supplied Content-Type. +// If the returned parser is nil then the scrape must fail. +func New(b []byte, contentType, fallbackType string, parseClassicHistograms, skipOMCTSeries bool, st *labels.SymbolTable) (Parser, error) { + mediaType, err := extractMediaType(contentType, fallbackType) + // err may be nil or something we want to warn about. + switch mediaType { case "application/openmetrics-text": - return NewOpenMetricsParser(b, st), nil + return NewOpenMetricsParser(b, st, func(o *openMetricsParserOptions) { + o.SkipCTSeries = skipOMCTSeries + }), err case "application/vnd.google.protobuf": - return NewProtobufParser(b, parseClassicHistograms, st), nil + return NewProtobufParser(b, parseClassicHistograms, st), err + case "text/plain": + return NewPromParser(b, st), err default: - return NewPromParser(b, st), nil + return nil, err } } diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index c644565628..72c8284f2d 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -14,16 +14,28 @@ package textparse import ( + "errors" + "io" "testing" + "github.com/google/go-cmp/cmp" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/testutil" ) func TestNewParser(t *testing.T) { t.Parallel() + requireNilParser := func(t *testing.T, p Parser) { + require.Nil(t, p) + } + requirePromParser := func(t *testing.T, p Parser) { require.NotNil(t, p) _, ok := p.(*PromParser) @@ -36,34 +48,83 @@ func TestNewParser(t *testing.T) { require.True(t, ok) } + requireProtobufParser := func(t *testing.T, p Parser) { + require.NotNil(t, p) + _, ok := p.(*ProtobufParser) + require.True(t, ok) + } + for name, tt := range map[string]*struct { - contentType string - validateParser func(*testing.T, Parser) - err string + contentType string + fallbackScrapeProtocol config.ScrapeProtocol + validateParser func(*testing.T, Parser) + err string }{ "empty-string": { - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target", + }, + "empty-string-fallback-text-plain": { + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol \"text/plain\"", }, "invalid-content-type-1": { contentType: "invalid/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "expected token after slash", }, + "invalid-content-type-1-fallback-text-plain": { + contentType: "invalid/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-openmetrics": { + contentType: "invalid/", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText0_0_1, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-protobuf": { + contentType: "invalid/", + validateParser: requireProtobufParser, + fallbackScrapeProtocol: config.PrometheusProto, + err: "expected token after slash", + }, "invalid-content-type-2": { contentType: "invalid/invalid/invalid", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "unexpected content after media subtype", }, + "invalid-content-type-2-fallback-text-plain": { + contentType: "invalid/invalid/invalid", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "unexpected content after media subtype", + }, "invalid-content-type-3": { contentType: "/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "no media type", }, + "invalid-content-type-3-fallback-text-plain": { + contentType: "/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "no media type", + }, "invalid-content-type-4": { contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "duplicate parameter name", }, + "invalid-content-type-4-fallback-open-metrics": { + contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText1_0_0, + err: "duplicate parameter name", + }, "openmetrics": { contentType: "application/openmetrics-text", validateParser: requireOpenMetricsParser, @@ -80,27 +141,129 @@ func TestNewParser(t *testing.T) { contentType: "text/plain", validateParser: requirePromParser, }, + "protobuf": { + contentType: "application/vnd.google.protobuf", + validateParser: requireProtobufParser, + }, "plain-text-with-version": { contentType: "text/plain; version=0.0.4", validateParser: requirePromParser, }, "some-other-valid-content-type": { contentType: "text/html", - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "received unsupported Content-Type \"text/html\" and no fallback_scrape_protocol specified for target", + }, + "some-other-valid-content-type-fallback-text-plain": { + contentType: "text/html", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "received unsupported Content-Type \"text/html\", using fallback_scrape_protocol \"text/plain\"", }, } { t.Run(name, func(t *testing.T) { tt := tt // Copy to local variable before going parallel. t.Parallel() - p, err := New([]byte{}, tt.contentType, false, labels.NewSymbolTable()) + fallbackProtoMediaType := tt.fallbackScrapeProtocol.HeaderMediaType() + + p, err := New([]byte{}, tt.contentType, fallbackProtoMediaType, false, false, labels.NewSymbolTable()) tt.validateParser(t, p) if tt.err == "" { require.NoError(t, err) } else { - require.Error(t, err) - require.Contains(t, err.Error(), tt.err) + require.ErrorContains(t, err, tt.err) } }) } } + +// parsedEntry represents data that is parsed for each entry. +type parsedEntry struct { + // In all but EntryComment, EntryInvalid. + m string + + // In EntryHistogram. + shs *histogram.Histogram + fhs *histogram.FloatHistogram + + // In EntrySeries. + v float64 + + // In EntrySeries and EntryHistogram. + lset labels.Labels + t *int64 + es []exemplar.Exemplar + ct *int64 + + // In EntryType. + typ model.MetricType + // In EntryHelp. + help string + // In EntryUnit. + unit string + // In EntryComment. + comment string +} + +func requireEntries(t *testing.T, exp, got []parsedEntry) { + t.Helper() + + testutil.RequireEqualWithOptions(t, exp, got, []cmp.Option{ + cmp.AllowUnexported(parsedEntry{}), + }) +} + +func testParse(t *testing.T, p Parser) (ret []parsedEntry) { + t.Helper() + + for { + et, err := p.Next() + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + + var got parsedEntry + var m []byte + switch et { + case EntryInvalid: + t.Fatal("entry invalid not expected") + case EntrySeries, EntryHistogram: + if et == EntrySeries { + m, got.t, got.v = p.Series() + got.m = string(m) + } else { + m, got.t, got.shs, got.fhs = p.Histogram() + got.m = string(m) + } + + p.Metric(&got.lset) + // Parser reuses int pointer. + if ct := p.CreatedTimestamp(); ct != nil { + got.ct = int64p(*ct) + } + for e := (exemplar.Exemplar{}); p.Exemplar(&e); { + got.es = append(got.es, e) + } + case EntryType: + m, got.typ = p.Type() + got.m = string(m) + + case EntryHelp: + m, h := p.Help() + got.m = string(m) + got.help = string(h) + + case EntryUnit: + m, u := p.Unit() + got.m = string(m) + got.unit = string(u) + + case EntryComment: + got.comment = string(p.Comment()) + } + ret = append(ret, got) + } + return ret +} diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go new file mode 100644 index 0000000000..d019c327c3 --- /dev/null +++ b/model/textparse/nhcbparse.go @@ -0,0 +1,376 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "errors" + "io" + "math" + "strconv" + "strings" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/convertnhcb" +) + +type collectionState int + +const ( + stateStart collectionState = iota + stateCollecting + stateEmitting +) + +// The NHCBParser wraps a Parser and converts classic histograms to native +// histograms with custom buckets. +// +// Since Parser interface is line based, this parser needs to keep track +// of the last classic histogram series it saw to collate them into a +// single native histogram. +// +// Note: +// - Only series that have the histogram metadata type are considered for +// conversion. +// - The classic series are also returned if keepClassicHistograms is true. +type NHCBParser struct { + // The parser we're wrapping. + parser Parser + // Option to keep classic histograms along with converted histograms. + keepClassicHistograms bool + + // Labels builder. + builder labels.ScratchBuilder + + // State of the parser. + state collectionState + + // Caches the values from the underlying parser. + // For Series and Histogram. + bytes []byte + ts *int64 + value float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + // For Metric. + lset labels.Labels + metricString string + // For Type. + bName []byte + typ model.MetricType + + // Caches the entry itself if we are inserting a converted NHCB + // halfway through. + entry Entry + err error + + // Caches the values and metric for the inserted converted NHCB. + bytesNHCB []byte + hNHCB *histogram.Histogram + fhNHCB *histogram.FloatHistogram + lsetNHCB labels.Labels + exemplars []exemplar.Exemplar + ctNHCB *int64 + metricStringNHCB string + + // Collates values from the classic histogram series to build + // the converted histogram later. + tempLsetNHCB labels.Labels + tempNHCB convertnhcb.TempHistogram + tempExemplars []exemplar.Exemplar + tempExemplarCount int + tempCT *int64 + + // Remembers the last base histogram metric name (assuming it's + // a classic histogram) so we can tell if the next float series + // is part of the same classic histogram. + lastHistogramName string + lastHistogramLabelsHash uint64 + lastHistogramExponential bool + // Reused buffer for hashing labels. + hBuffer []byte +} + +func NewNHCBParser(p Parser, st *labels.SymbolTable, keepClassicHistograms bool) Parser { + return &NHCBParser{ + parser: p, + keepClassicHistograms: keepClassicHistograms, + builder: labels.NewScratchBuilderWithSymbolTable(st, 16), + tempNHCB: convertnhcb.NewTempHistogram(), + } +} + +func (p *NHCBParser) Series() ([]byte, *int64, float64) { + return p.bytes, p.ts, p.value +} + +func (p *NHCBParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { + if p.state == stateEmitting { + return p.bytesNHCB, p.ts, p.hNHCB, p.fhNHCB + } + return p.bytes, p.ts, p.h, p.fh +} + +func (p *NHCBParser) Help() ([]byte, []byte) { + return p.parser.Help() +} + +func (p *NHCBParser) Type() ([]byte, model.MetricType) { + return p.bName, p.typ +} + +func (p *NHCBParser) Unit() ([]byte, []byte) { + return p.parser.Unit() +} + +func (p *NHCBParser) Comment() []byte { + return p.parser.Comment() +} + +func (p *NHCBParser) Metric(l *labels.Labels) string { + if p.state == stateEmitting { + *l = p.lsetNHCB + return p.metricStringNHCB + } + *l = p.lset + return p.metricString +} + +func (p *NHCBParser) Exemplar(ex *exemplar.Exemplar) bool { + if p.state == stateEmitting { + if len(p.exemplars) == 0 { + return false + } + *ex = p.exemplars[0] + p.exemplars = p.exemplars[1:] + return true + } + return p.parser.Exemplar(ex) +} + +func (p *NHCBParser) CreatedTimestamp() *int64 { + switch p.state { + case stateStart: + if p.entry == EntrySeries || p.entry == EntryHistogram { + return p.parser.CreatedTimestamp() + } + case stateCollecting: + return p.tempCT + case stateEmitting: + return p.ctNHCB + } + return nil +} + +func (p *NHCBParser) Next() (Entry, error) { + if p.state == stateEmitting { + p.state = stateStart + if p.entry == EntrySeries { + isNHCB := p.handleClassicHistogramSeries(p.lset) + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + } + return p.entry, p.err + } + + p.entry, p.err = p.parser.Next() + if p.err != nil { + if errors.Is(p.err, io.EOF) && p.processNHCB() { + return EntryHistogram, nil + } + return EntryInvalid, p.err + } + switch p.entry { + case EntrySeries: + p.bytes, p.ts, p.value = p.parser.Series() + p.metricString = p.parser.Metric(&p.lset) + // Check the label set to see if we can continue or need to emit the NHCB. + var isNHCB bool + if p.compareLabels() { + // Labels differ. Check if we can emit the NHCB. + if p.processNHCB() { + return EntryHistogram, nil + } + isNHCB = p.handleClassicHistogramSeries(p.lset) + } else { + // Labels are the same. Check if after an exponential histogram. + if p.lastHistogramExponential { + isNHCB = false + } else { + isNHCB = p.handleClassicHistogramSeries(p.lset) + } + } + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + return p.entry, p.err + case EntryHistogram: + p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() + p.metricString = p.parser.Metric(&p.lset) + p.storeExponentialLabels() + case EntryType: + p.bName, p.typ = p.parser.Type() + } + if p.processNHCB() { + return EntryHistogram, nil + } + return p.entry, p.err +} + +// Return true if labels have changed and we should emit the NHCB. +func (p *NHCBParser) compareLabels() bool { + if p.state != stateCollecting { + return false + } + if p.typ != model.MetricTypeHistogram { + // Different metric type. + return true + } + if p.lastHistogramName != convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) { + // Different metric name. + return true + } + nextHash, _ := p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + // Different label values. + return p.lastHistogramLabelsHash != nextHash +} + +// Save the label set of the classic histogram without suffix and bucket `le` label. +func (p *NHCBParser) storeClassicLabels() { + p.lastHistogramName = convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + p.lastHistogramExponential = false +} + +func (p *NHCBParser) storeExponentialLabels() { + p.lastHistogramName = p.lset.Get(labels.MetricName) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer) + p.lastHistogramExponential = true +} + +// handleClassicHistogramSeries collates the classic histogram series to be converted to NHCB +// if it is actually a classic histogram series (and not a normal float series) and if there +// isn't already a native histogram with the same name (assuming it is always processed +// right before the classic histograms) and returns true if the collation was done. +func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { + if p.typ != model.MetricTypeHistogram { + return false + } + mName := lset.Get(labels.MetricName) + // Sanity check to ensure that the TYPE metadata entry name is the same as the base name. + if convertnhcb.GetHistogramMetricBaseName(mName) != string(p.bName) { + return false + } + switch { + case strings.HasSuffix(mName, "_bucket") && lset.Has(labels.BucketLabel): + le, err := strconv.ParseFloat(lset.Get(labels.BucketLabel), 64) + if err == nil && !math.IsNaN(le) { + p.processClassicHistogramSeries(lset, "_bucket", func(hist *convertnhcb.TempHistogram) { + hist.BucketCounts[le] = p.value + }) + return true + } + case strings.HasSuffix(mName, "_count"): + p.processClassicHistogramSeries(lset, "_count", func(hist *convertnhcb.TempHistogram) { + hist.Count = p.value + }) + return true + case strings.HasSuffix(mName, "_sum"): + p.processClassicHistogramSeries(lset, "_sum", func(hist *convertnhcb.TempHistogram) { + hist.Sum = p.value + }) + return true + } + return false +} + +func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { + if p.state != stateCollecting { + p.storeClassicLabels() + p.tempCT = p.parser.CreatedTimestamp() + p.state = stateCollecting + } + p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, suffix) + p.storeExemplars() + updateHist(&p.tempNHCB) +} + +func (p *NHCBParser) storeExemplars() { + for ex := p.nextExemplarPtr(); p.parser.Exemplar(ex); ex = p.nextExemplarPtr() { + p.tempExemplarCount++ + } +} + +func (p *NHCBParser) nextExemplarPtr() *exemplar.Exemplar { + switch { + case p.tempExemplarCount == len(p.tempExemplars)-1: + // Reuse the previously allocated exemplar, it was not filled up. + case len(p.tempExemplars) == cap(p.tempExemplars): + // Let the runtime grow the slice. + p.tempExemplars = append(p.tempExemplars, exemplar.Exemplar{}) + default: + // Take the next element into use. + p.tempExemplars = p.tempExemplars[:len(p.tempExemplars)+1] + } + return &p.tempExemplars[len(p.tempExemplars)-1] +} + +func (p *NHCBParser) swapExemplars() { + p.exemplars = p.tempExemplars[:p.tempExemplarCount] + p.tempExemplars = p.tempExemplars[:0] + p.tempExemplarCount = 0 +} + +// processNHCB converts the collated classic histogram series to NHCB and caches the info +// to be returned to callers. Retruns true if the conversion was successful. +func (p *NHCBParser) processNHCB() bool { + if p.state != stateCollecting { + return false + } + ub := make([]float64, 0, len(p.tempNHCB.BucketCounts)) + for b := range p.tempNHCB.BucketCounts { + ub = append(ub, b) + } + upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(ub, false) + fhBase := hBase.ToFloat(nil) + h, fh := convertnhcb.NewHistogram(p.tempNHCB, upperBounds, hBase, fhBase) + if h != nil { + if err := h.Validate(); err != nil { + return false + } + p.hNHCB = h + p.fhNHCB = nil + } else if fh != nil { + if err := fh.Validate(); err != nil { + return false + } + p.hNHCB = nil + p.fhNHCB = fh + } + p.metricStringNHCB = p.tempLsetNHCB.Get(labels.MetricName) + strings.ReplaceAll(p.tempLsetNHCB.DropMetricName().String(), ", ", ",") + p.bytesNHCB = []byte(p.metricStringNHCB) + p.lsetNHCB = p.tempLsetNHCB + p.swapExemplars() + p.ctNHCB = p.tempCT + p.tempNHCB = convertnhcb.NewTempHistogram() + p.state = stateEmitting + p.tempCT = nil + return true +} diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go new file mode 100644 index 0000000000..6152a85038 --- /dev/null +++ b/model/textparse/nhcbparse_test.go @@ -0,0 +1,939 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "bytes" + "encoding/binary" + "strconv" + "testing" + + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" +) + +func TestNHCBParserOnOMParser(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +# UNIT go_gc_duration_seconds seconds +go_gc_duration_seconds{quantile="0"} 4.9351e-05 +go_gc_duration_seconds{quantile="0.25"} 7.424100000000001e-05 +go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 +# HELP nohelp1 +# HELP help2 escape \ \n \\ \" \x chars +# UNIT nounit +go_gc_duration_seconds{quantile="1.0",a="b"} 8.3835e-05 +go_gc_duration_seconds_count 99 +some:aggregate:rate5m{a_b="c"} 1 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 33 123.123 +# TYPE hh histogram +hh_bucket{le="+Inf"} 1 +# TYPE gh gaugehistogram +gh_bucket{le="+Inf"} 1 +# TYPE hhh histogram +hhh_bucket{le="+Inf"} 1 # {id="histogram-bucket-test"} 4 +hhh_count 1 # {id="histogram-count-test"} 4 +# TYPE ggh gaugehistogram +ggh_bucket{le="+Inf"} 1 # {id="gaugehistogram-bucket-test",xx="yy"} 4 123.123 +ggh_count 1 # {id="gaugehistogram-count-test",xx="yy"} 4 123.123 +# TYPE smr_seconds summary +smr_seconds_count 2.0 # {id="summary-count-test"} 1 123.321 +smr_seconds_sum 42.0 # {id="summary-sum-test"} 1 123.321 +# TYPE ii info +ii{foo="bar"} 1 +# TYPE ss stateset +ss{ss="foo"} 1 +ss{ss="bar"} 0 +ss{A="a"} 0 +# TYPE un unknown +_metric_starting_with_underscore 1 +testmetric{_label_starting_with_underscore="foo"} 1 +testmetric{label="\"bar\""} 1 +# HELP foo Counter with and without labels to certify CT is parsed for both cases +# TYPE foo counter +foo_total 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created{a="b"} 1520872607.123 +# HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far +# TYPE bar summary +bar_count 17.0 +bar_sum 324789.3 +bar{quantile="0.95"} 123.7 +bar{quantile="0.99"} 150.0 +bar_created 1520872608.124 +# HELP baz Histogram with the same objective as above's summary +# TYPE baz histogram +baz_bucket{le="0.0"} 0 +baz_bucket{le="+Inf"} 17 +baz_count 17 +baz_sum 324789.3 +baz_created 1520872609.125 +# HELP fizz_created Gauge which shouldn't be parsed as CT +# TYPE fizz_created gauge +fizz_created 17.0 +# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 1 +something_bucket{le="+Inf"} 18 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 +something_bucket{a="b",le="+Inf"} 9 +something_created{a="b"} 1520430002 +# HELP yum Summary with _created between sum and quantiles +# TYPE yum summary +yum_count 20 +yum_sum 324789.5 +yum_created 1520430003 +yum{quantile="0.95"} 123.7 +yum{quantile="0.99"} 150.0 +# HELP foobar Summary with _created as the first line +# TYPE foobar summary +foobar_count 21 +foobar_created 1520430004 +foobar_sum 324789.6 +foobar{quantile="0.95"} 123.8 +foobar{quantile="0.99"} 150.1` + + input += "\n# HELP metric foo\x00bar" + input += "\nnull_byte_metric{a=\"abc\x00\"} 1" + input += "\n# EOF\n" + + exp := []parsedEntry{ + { + m: "go_gc_duration_seconds", + help: "A summary of the GC invocation durations.", + }, { + m: "go_gc_duration_seconds", + typ: model.MetricTypeSummary, + }, { + m: "go_gc_duration_seconds", + unit: "seconds", + }, { + m: `go_gc_duration_seconds{quantile="0"}`, + v: 4.9351e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), + }, { + m: `go_gc_duration_seconds{quantile="0.25"}`, + v: 7.424100000000001e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.25"), + }, { + m: `go_gc_duration_seconds{quantile="0.5",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: "nohelp1", + help: "", + }, { + m: "help2", + help: "escape \\ \n \\ \" \\x chars", + }, { + m: "nounit", + unit: "", + }, { + m: `go_gc_duration_seconds{quantile="1.0",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds_count`, + v: 99, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds_count"), + }, { + m: `some:aggregate:rate5m{a_b="c"}`, + v: 1, + lset: labels.FromStrings("__name__", "some:aggregate:rate5m", "a_b", "c"), + }, { + m: "go_goroutines", + help: "Number of goroutines that currently exist.", + }, { + m: "go_goroutines", + typ: model.MetricTypeGauge, + }, { + m: `go_goroutines`, + v: 33, + t: int64p(123123), + lset: labels.FromStrings("__name__", "go_goroutines"), + }, { + m: "hh", + typ: model.MetricTypeHistogram, + }, { + m: `hh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hh"), + }, { + m: "gh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `gh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"), + }, { + m: "hhh", + typ: model.MetricTypeHistogram, + }, { + m: `hhh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hhh"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + {Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + }, + }, { + m: "ggh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `ggh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: `ggh_count`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: "smr_seconds", + typ: model.MetricTypeSummary, + }, { + m: `smr_seconds_count`, + v: 2, + lset: labels.FromStrings("__name__", "smr_seconds_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: `smr_seconds_sum`, + v: 42, + lset: labels.FromStrings("__name__", "smr_seconds_sum"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: "ii", + typ: model.MetricTypeInfo, + }, { + m: `ii{foo="bar"}`, + v: 1, + lset: labels.FromStrings("__name__", "ii", "foo", "bar"), + }, { + m: "ss", + typ: model.MetricTypeStateset, + }, { + m: `ss{ss="foo"}`, + v: 1, + lset: labels.FromStrings("__name__", "ss", "ss", "foo"), + }, { + m: `ss{ss="bar"}`, + v: 0, + lset: labels.FromStrings("__name__", "ss", "ss", "bar"), + }, { + m: `ss{A="a"}`, + v: 0, + lset: labels.FromStrings("A", "a", "__name__", "ss"), + }, { + m: "un", + typ: model.MetricTypeUnknown, + }, { + m: "_metric_starting_with_underscore", + v: 1, + lset: labels.FromStrings("__name__", "_metric_starting_with_underscore"), + }, { + m: "testmetric{_label_starting_with_underscore=\"foo\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "_label_starting_with_underscore", "foo"), + }, { + m: "testmetric{label=\"\\\"bar\\\"\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: "foo", + help: "Counter with and without labels to certify CT is parsed for both cases", + }, { + m: "foo", + typ: model.MetricTypeCounter, + }, { + m: "foo_total", + v: 17, + lset: labels.FromStrings("__name__", "foo_total"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: `foo_total{a="b"}`, + v: 17.0, + lset: labels.FromStrings("__name__", "foo_total", "a", "b"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: "bar", + help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", + }, { + m: "bar", + typ: model.MetricTypeSummary, + }, { + m: "bar_count", + v: 17.0, + lset: labels.FromStrings("__name__", "bar_count"), + ct: int64p(1520872608124), + }, { + m: "bar_sum", + v: 324789.3, + lset: labels.FromStrings("__name__", "bar_sum"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), + ct: int64p(1520872608124), + }, { + m: "baz", + help: "Histogram with the same objective as above's summary", + }, { + m: "baz", + typ: model.MetricTypeHistogram, + }, { + m: `baz{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 17, + Sum: 324789.3, + PositiveSpans: []histogram.Span{{Offset: 1, Length: 1}}, // The first bucket has 0 count so we don't store it and Offset is 1. + PositiveBuckets: []int64{17}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "baz"), + ct: int64p(1520872609125), + }, { + m: "fizz_created", + help: "Gauge which shouldn't be parsed as CT", + }, { + m: "fizz_created", + typ: model.MetricTypeGauge, + }, { + m: `fizz_created`, + v: 17, + lset: labels.FromStrings("__name__", "fizz_created"), + }, { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{1, 16}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + ct: int64p(1520430001000), + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{8, -7}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + ct: int64p(1520430002000), + }, { + m: "yum", + help: "Summary with _created between sum and quantiles", + }, { + m: "yum", + typ: model.MetricTypeSummary, + }, { + m: `yum_count`, + v: 20, + lset: labels.FromStrings("__name__", "yum_count"), + ct: int64p(1520430003000), + }, { + m: `yum_sum`, + v: 324789.5, + lset: labels.FromStrings("__name__", "yum_sum"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), + ct: int64p(1520430003000), + }, { + m: "foobar", + help: "Summary with _created as the first line", + }, { + m: "foobar", + typ: model.MetricTypeSummary, + }, { + m: `foobar_count`, + v: 21, + lset: labels.FromStrings("__name__", "foobar_count"), + ct: int64p(1520430004000), + }, { + m: `foobar_sum`, + v: 324789.6, + lset: labels.FromStrings("__name__", "foobar_sum"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.95"}`, + v: 123.8, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.99"}`, + v: 150.1, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), + ct: int64p(1520430004000), + }, { + m: "metric", + help: "foo\x00bar", + }, { + m: "null_byte_metric{a=\"abc\x00\"}", + v: 1, + lset: labels.FromStrings("__name__", "null_byte_metric", "a", "abc\x00"), + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +func TestNHCBParserOMParser_MultipleHistograms(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_bucket{le="0.0"} 1 # {id="something-test"} -2.0 +something_bucket{le="1.0"} 16 # {id="something-test"} 0.5 +something_bucket{le="+Inf"} 18 # {id="something-test"} 8 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 # {id="something-test"} 0.0 123.321 +something_bucket{a="b",le="1.0"} 8 +something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 +# EOF +` + + exp := []parsedEntry{ + { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 3}}, + PositiveBuckets: []int64{1, 14, -13}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: -2.0}, + {Labels: labels.FromStrings("id", "something-test"), Value: 0.5}, + {Labels: labels.FromStrings("id", "something-test"), Value: 8.0}, + }, + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}, {Offset: 1, Length: 1}}, + PositiveBuckets: []int64{8, -7}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: 0.0, HasTs: true, Ts: 123321}, + {Labels: labels.FromStrings("id", "something-test"), Value: 2e100, HasTs: true, Ts: 123000}, + }, + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +// Verify the requirement tables from +// https://github.com/prometheus/prometheus/issues/13532 . +// "classic" means the option "always_scrape_classic_histograms". +// "nhcb" means the option "convert_classic_histograms_to_nhcb". +// +// Case 1. Only classic histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | YES | NO | NO |. +// | classic=true, nhcb=false | YES | NO | NO |. +// | classic=false, nhcb=true | NO | NO | YES |. +// | classic=true, nhcb=true | YES | NO | YES |. +// +// Case 2. Both classic and exponential histograms are exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | YES | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | YES | YES | NO |. +// +// Case 3. Only exponential histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | NO | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | NO | YES | NO |. +func TestNHCBParser_NoNHCBWhenExponential(t *testing.T) { + type requirement struct { + expectClassic bool + expectExponential bool + expectNHCB bool + } + + cases := []map[string]requirement{ + // Case 1. + { + "classic=false, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: false, expectNHCB: true}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: false, expectNHCB: true}, + }, + // Case 2. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: true, expectNHCB: false}, + }, + // Case 3. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + }, + } + + // Create parser from keep classic option. + type parserFactory func(bool) Parser + + type testCase struct { + name string + parser parserFactory + classic bool + nhcb bool + exp []parsedEntry + } + + type parserOptions struct { + useUTF8sep bool + hasCreatedTimeStamp bool + } + // Defines the parser name, the Parser factory and the test cases + // supported by the parser and parser options. + parsers := []func() (string, parserFactory, []int, parserOptions){ + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + inputBuf := createTestProtoBufHistogram(t) + return NewProtobufParser(inputBuf.Bytes(), keepClassic, labels.NewSymbolTable()) + } + return "ProtoBuf", factory, []int{1, 2, 3}, parserOptions{useUTF8sep: true, hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestOpenMetricsHistogram() + return NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + } + return "OpenMetrics", factory, []int{1}, parserOptions{hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestPromHistogram() + return NewPromParser([]byte(input), labels.NewSymbolTable()) + } + return "Prometheus", factory, []int{1}, parserOptions{} + }, + } + + testCases := []testCase{} + for _, parser := range parsers { + for _, classic := range []bool{false, true} { + for _, nhcb := range []bool{false, true} { + parserName, parser, supportedCases, options := parser() + requirementName := "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb) + tc := testCase{ + name: "parser=" + parserName + ", " + requirementName, + parser: parser, + classic: classic, + nhcb: nhcb, + exp: []parsedEntry{}, + } + for _, caseNumber := range supportedCases { + caseI := cases[caseNumber-1] + req, ok := caseI[requirementName] + require.True(t, ok, "Case %d does not have requirement %s", caseNumber, requirementName) + metric := "test_histogram" + strconv.Itoa(caseNumber) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + help: "Test histogram " + strconv.Itoa(caseNumber), + }) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + typ: model.MetricTypeHistogram, + }) + + var ct *int64 + if options.hasCreatedTimeStamp { + ct = int64p(1000) + } + + var bucketForMetric func(string) string + if options.useUTF8sep { + bucketForMetric = func(s string) string { + return "_bucket\xffle\xff" + s + } + } else { + bucketForMetric = func(s string) string { + return "_bucket{le=\"" + s + "\"}" + } + } + + if req.expectExponential { + // Always expect exponential histogram first. + exponentialSeries := []parsedEntry{ + { + m: metric, + shs: &histogram.Histogram{ + Schema: 3, + Count: 175, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 2, + PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, exponentialSeries...) + } + if req.expectClassic { + // Always expect classic histogram series after exponential. + classicSeries := []parsedEntry{ + { + m: metric + "_count", + v: 175, + lset: labels.FromStrings("__name__", metric+"_count"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + "_sum", + v: 0.0008280461746287094, + lset: labels.FromStrings("__name__", metric+"_sum"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0004899999999999998"), + v: 2, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0003899999999999998"), + v: 4, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0002899999999999998"), + v: 16, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("+Inf"), + v: 175, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, classicSeries...) + } + if req.expectNHCB { + // Always expect NHCB series after classic. + nhcbSeries := []parsedEntry{ + { + m: metric + "{}", + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 175, + Sum: 0.0008280461746287094, + PositiveSpans: []histogram.Span{{Length: 4}}, + PositiveBuckets: []int64{2, 0, 10, 147}, + CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, nhcbSeries...) + } + } + testCases = append(testCases, tc) + } + } + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := tc.parser(tc.classic) + if tc.nhcb { + p = NewNHCBParser(p, labels.NewSymbolTable(), tc.classic) + } + got := testParse(t, p) + requireEntries(t, tc.exp, got) + }) + } +} + +func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer { + testMetricFamilies := []string{`name: "test_histogram1" +help: "Test histogram 1" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + > + timestamp_ms: 1234568 +>`, `name: "test_histogram2" +help: "Test histogram 2" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +>`, `name: "test_histogram3" +help: "Test histogram 3" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +> +`} + + varintBuf := make([]byte, binary.MaxVarintLen32) + buf := &bytes.Buffer{} + + for _, tmf := range testMetricFamilies { + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(tmf, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + } + + return buf +} + +func createTestOpenMetricsHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234.568 +test_histogram1_sum 0.0008280461746287094 1234.568 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234.568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234.568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234.568 +test_histogram1_bucket{le="+Inf"} 175 1234.568 +test_histogram1_created 1 +# EOF` +} + +func createTestPromHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234568 +test_histogram1_sum 0.0008280461746287094 1234768 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234568 +test_histogram1_bucket{le="+Inf"} 175 1234568` +} diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 5f0415d3ee..3ae9c7ddfc 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -17,13 +17,16 @@ package textparse import ( + "bytes" "errors" "fmt" "io" "math" + "strconv" "strings" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" @@ -72,15 +75,16 @@ func (l *openMetricsLexer) Error(es string) { // OpenMetrics text exposition format. // This is based on the working draft https://docs.google.com/document/u/1/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit type OpenMetricsParser struct { - l *openMetricsLexer - builder labels.ScratchBuilder - series []byte - text []byte - mtype model.MetricType - val float64 - ts int64 - hasTS bool - start int + l *openMetricsLexer + builder labels.ScratchBuilder + series []byte + mfNameLen int // length of metric family name to get from series. + text []byte + mtype model.MetricType + val float64 + ts int64 + hasTS bool + start int // offsets is a list of offsets into series that describe the positions // of the metric name and label names and values for this series. // p.offsets[0] is the start character of the metric name. @@ -95,7 +99,15 @@ type OpenMetricsParser struct { exemplarTs int64 hasExemplarTs bool - skipCTSeries bool + // Created timestamp parsing state. + ct int64 + ctHashSet uint64 + // ignoreExemplar instructs the parser to not overwrite exemplars (to keep them while peeking ahead). + ignoreExemplar bool + // visitedMFName is the metric family name of the last visited metric when peeking ahead + // for _created series during the execution of the CreatedTimestamp method. + visitedMFName []byte + skipCTSeries bool } type openMetricsParserOptions struct { @@ -201,7 +213,7 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) p.builder.Add(label, value) } @@ -252,87 +264,144 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool { // CreatedTimestamp returns the created timestamp for a current Metric if exists or nil. // NOTE(Maniktherana): Might use additional CPU/mem resources due to deep copy of parser required for peeking given 1.0 OM specification on _created series. func (p *OpenMetricsParser) CreatedTimestamp() *int64 { - if !TypeRequiresCT(p.mtype) { + if !typeRequiresCT(p.mtype) { // Not a CT supported metric type, fast path. + p.ctHashSet = 0 // Use ctHashSet as a single way of telling "empty cache" return nil } var ( - currLset labels.Labels - buf []byte - peekWithoutNameLsetHash uint64 + buf []byte + currName []byte ) - p.Metric(&currLset) - currFamilyLsetHash, buf := currLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - // Search for the _created line for the currFamilyLsetHash using ephemeral parser until - // we see EOF or new metric family. We have to do it as we don't know where (and if) - // that CT line is. - // TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. - peek := deepCopy(p) - for { - eType, err := peek.Next() - if err != nil { - // This means peek will give error too later on, so def no CT line found. - // This might result in partial scrape with wrong/missing CT, but only - // spec improvement would help. - // TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. - return nil - } - if eType != EntrySeries { - // Assume we hit different family, no CT line found. - return nil - } - - var peekedLset labels.Labels - peek.Metric(&peekedLset) - peekedName := peekedLset.Get(model.MetricNameLabel) - if !strings.HasSuffix(peekedName, "_created") { - // Not a CT line, search more. - continue - } - - // We got a CT line here, but let's search if CT line is actually for our series, edge case. - peekWithoutNameLsetHash, _ = peekedLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - if peekWithoutNameLsetHash != currFamilyLsetHash { - // CT line for a different series, for our series no CT. - return nil - } - ct := int64(peek.val) - return &ct + if len(p.series) > 1 && p.series[0] == '{' && p.series[1] == '"' { + // special case for UTF-8 encoded metric family names. + currName = p.series[p.offsets[0]-p.start : p.mfNameLen+2] + } else { + currName = p.series[p.offsets[0]-p.start : p.mfNameLen] } -} -// TypeRequiresCT returns true if the metric type requires a _created timestamp. -func TypeRequiresCT(t model.MetricType) bool { - switch t { - case model.MetricTypeCounter, model.MetricTypeSummary, model.MetricTypeHistogram: - return true - default: - return false + currHash := p.seriesHash(&buf, currName) + // Check cache, perhaps we fetched something already. + if currHash == p.ctHashSet && p.ct > 0 { + return &p.ct } -} -// deepCopy creates a copy of a parser without re-using the slices' original memory addresses. -func deepCopy(p *OpenMetricsParser) OpenMetricsParser { - newB := make([]byte, len(p.l.b)) - copy(newB, p.l.b) - - newLexer := &openMetricsLexer{ - b: newB, + // Create a new lexer to reset the parser once this function is done executing. + resetLexer := &openMetricsLexer{ + b: p.l.b, i: p.l.i, start: p.l.start, err: p.l.err, state: p.l.state, } - newParser := OpenMetricsParser{ - l: newLexer, - builder: p.builder, - mtype: p.mtype, - val: p.val, - skipCTSeries: false, + p.skipCTSeries = false + + p.ignoreExemplar = true + savedStart := p.start + defer func() { + p.ignoreExemplar = false + p.start = savedStart + p.l = resetLexer + }() + + for { + eType, err := p.Next() + if err != nil { + // This means p.Next() will give error too later on, so def no CT line found. + // This might result in partial scrape with wrong/missing CT, but only + // spec improvement would help. + // TODO: Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. + p.resetCTParseValues() + return nil + } + if eType != EntrySeries { + // Assume we hit different family, no CT line found. + p.resetCTParseValues() + return nil + } + + peekedName := p.series[p.offsets[0]-p.start : p.offsets[1]-p.start] + if len(peekedName) < 8 || string(peekedName[len(peekedName)-8:]) != "_created" { + // Not a CT line, search more. + continue + } + + // Remove _created suffix. + peekedHash := p.seriesHash(&buf, peekedName[:len(peekedName)-8]) + if peekedHash != currHash { + // Found CT line for a different series, for our series no CT. + p.resetCTParseValues() + return nil + } + + // All timestamps in OpenMetrics are Unix Epoch in seconds. Convert to milliseconds. + // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#timestamps + ct := int64(p.val * 1000.0) + p.setCTParseValues(ct, currHash, currName, true) + return &ct + } +} + +var ( + leBytes = []byte{108, 101} + quantileBytes = []byte{113, 117, 97, 110, 116, 105, 108, 101} +) + +// seriesHash generates a hash based on the metric family name and the offsets +// of label names and values from the parsed OpenMetrics data. It skips quantile +// and le labels for summaries and histograms respectively. +func (p *OpenMetricsParser) seriesHash(offsetsArr *[]byte, metricFamilyName []byte) uint64 { + // Iterate through p.offsets to find the label names and values. + for i := 2; i < len(p.offsets); i += 4 { + lStart := p.offsets[i] - p.start + lEnd := p.offsets[i+1] - p.start + label := p.series[lStart:lEnd] + // Skip quantile and le labels for summaries and histograms. + if p.mtype == model.MetricTypeSummary && bytes.Equal(label, quantileBytes) { + continue + } + if p.mtype == model.MetricTypeHistogram && bytes.Equal(label, leBytes) { + continue + } + *offsetsArr = append(*offsetsArr, p.series[lStart:lEnd]...) + vStart := p.offsets[i+2] - p.start + vEnd := p.offsets[i+3] - p.start + *offsetsArr = append(*offsetsArr, p.series[vStart:vEnd]...) + } + + *offsetsArr = append(*offsetsArr, metricFamilyName...) + hashedOffsets := xxhash.Sum64(*offsetsArr) + + // Reset the offsets array for later reuse. + *offsetsArr = (*offsetsArr)[:0] + return hashedOffsets +} + +// setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found. +// This is useful to prevent re-parsing the same series again and early return the CT value. +func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool) { + p.ct = ct + p.ctHashSet = ctHashSet + p.visitedMFName = mfName + p.skipCTSeries = skipCTSeries // Do we need to set it? +} + +// resetCtParseValues resets the parser to the state before CreatedTimestamp method was called. +func (p *OpenMetricsParser) resetCTParseValues() { + p.ctHashSet = 0 + p.skipCTSeries = true +} + +// typeRequiresCT returns true if the metric type requires a _created timestamp. +func typeRequiresCT(t model.MetricType) bool { + switch t { + case model.MetricTypeCounter, model.MetricTypeSummary, model.MetricTypeHistogram: + return true + default: + return false } - return newParser } // nextToken returns the next token from the openMetricsLexer. @@ -356,10 +425,12 @@ func (p *OpenMetricsParser) Next() (Entry, error) { p.start = p.l.i p.offsets = p.offsets[:0] - p.eOffsets = p.eOffsets[:0] - p.exemplar = p.exemplar[:0] - p.exemplarVal = 0 - p.hasExemplarTs = false + if !p.ignoreExemplar { + p.eOffsets = p.eOffsets[:0] + p.exemplar = p.exemplar[:0] + p.exemplarVal = 0 + p.hasExemplarTs = false + } switch t := p.nextToken(); t { case tEOFWord: @@ -378,6 +449,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) { mStart++ mEnd-- } + p.mfNameLen = mEnd - mStart p.offsets = append(p.offsets, mStart, mEnd) default: return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) @@ -483,6 +555,16 @@ func (p *OpenMetricsParser) Next() (Entry, error) { func (p *OpenMetricsParser) parseComment() error { var err error + + if p.ignoreExemplar { + for t := p.nextToken(); t != tLinebreak; t = p.nextToken() { + if t == tEOF { + return errors.New("data does not end with # EOF") + } + } + return nil + } + // Parse the labels. p.eOffsets, err = p.parseLVals(p.eOffsets, true) if err != nil { @@ -591,10 +673,9 @@ func (p *OpenMetricsParser) parseLVals(offsets []int, isExemplar bool) ([]int, e // isCreatedSeries returns true if the current series is a _created series. func (p *OpenMetricsParser) isCreatedSeries() bool { - var newLbs labels.Labels - p.Metric(&newLbs) - name := newLbs.Get(model.MetricNameLabel) - if TypeRequiresCT(p.mtype) && strings.HasSuffix(name, "_created") { + metricName := p.series[p.offsets[0]-p.start : p.offsets[1]-p.start] + // check length so the metric is longer than len("_created") + if typeRequiresCT(p.mtype) && len(metricName) >= 8 && string(metricName[len(metricName)-8:]) == "_created" { return true } return false @@ -663,3 +744,15 @@ func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error } return val, nil } + +// normalizeFloatsInLabelValues ensures that values of the "le" labels of classic histograms and "quantile" labels +// of summaries follow OpenMetrics formatting rules. +func normalizeFloatsInLabelValues(t model.MetricType, l, v string) string { + if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) { + f, err := strconv.ParseFloat(v, 64) + if err == nil { + return formatOpenMetricsFloat(f) + } + } + return v +} diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index cadaabc99f..9c3c679ab5 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -14,7 +14,7 @@ package textparse import ( - "errors" + "fmt" "io" "testing" @@ -69,32 +69,57 @@ testmetric{label="\"bar\""} 1 # HELP foo Counter with and without labels to certify CT is parsed for both cases # TYPE foo counter foo_total 17.0 1520879607.789 # {id="counter-test"} 5 -foo_created 1000 +foo_created 1520872607.123 foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 -foo_created{a="b"} 1000 +foo_created{a="b"} 1520872607.123 +foo_total{le="c"} 21.0 +foo_created{le="c"} 1520872621.123 +foo_total{le="1"} 10.0 # HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far # TYPE bar summary bar_count 17.0 bar_sum 324789.3 bar{quantile="0.95"} 123.7 bar{quantile="0.99"} 150.0 -bar_created 1520430000 +bar_created 1520872608.124 # HELP baz Histogram with the same objective as above's summary # TYPE baz histogram baz_bucket{le="0.0"} 0 baz_bucket{le="+Inf"} 17 baz_count 17 baz_sum 324789.3 -baz_created 1520430000 +baz_created 1520872609.125 # HELP fizz_created Gauge which shouldn't be parsed as CT # TYPE fizz_created gauge -fizz_created 17.0` +fizz_created 17.0 +# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 1 +something_bucket{le="1"} 2 +something_bucket{le="+Inf"} 18 +# HELP yum Summary with _created between sum and quantiles +# TYPE yum summary +yum_count 20 +yum_sum 324789.5 +yum_created 1520430003 +yum{quantile="0.95"} 123.7 +yum{quantile="0.99"} 150.0 +# HELP foobar Summary with _created as the first line +# TYPE foobar summary +foobar_count 21 +foobar_created 1520430004 +foobar_sum 324789.6 +foobar{quantile="0.95"} 123.8 +foobar{quantile="0.99"} 150.1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" input += "\n# EOF\n" - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go_gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -107,7 +132,7 @@ fizz_created 17.0` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25"}`, v: 7.424100000000001e-05, @@ -169,12 +194,16 @@ fizz_created 17.0` m: `hhh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "hhh_bucket", "le", "+Inf"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + }, }, { m: `hhh_count`, v: 1, lset: labels.FromStrings("__name__", "hhh_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + }, }, { m: "ggh", typ: model.MetricTypeGaugeHistogram, @@ -182,12 +211,16 @@ fizz_created 17.0` m: `ggh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + }, }, { m: `ggh_count`, v: 1, lset: labels.FromStrings("__name__", "ggh_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + }, }, { m: "smr_seconds", typ: model.MetricTypeSummary, @@ -195,12 +228,16 @@ fizz_created 17.0` m: `smr_seconds_count`, v: 2, lset: labels.FromStrings("__name__", "smr_seconds_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}, + }, }, { m: `smr_seconds_sum`, v: 42, lset: labels.FromStrings("__name__", "smr_seconds_sum"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}, + }, }, { m: "ii", typ: model.MetricTypeInfo, @@ -249,15 +286,28 @@ fizz_created 17.0` v: 17, lset: labels.FromStrings("__name__", "foo_total"), t: int64p(1520879607789), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5}, - ct: int64p(1000), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "counter-test"), Value: 5}, + }, + ct: int64p(1520872607123), }, { m: `foo_total{a="b"}`, v: 17.0, lset: labels.FromStrings("__name__", "foo_total", "a", "b"), t: int64p(1520879607789), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5}, - ct: int64p(1000), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "counter-test"), Value: 5}, + }, + ct: int64p(1520872607123), + }, { + m: `foo_total{le="c"}`, + v: 21.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "c"), + ct: int64p(1520872621123), + }, { + m: `foo_total{le="1"}`, + v: 10.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "1"), }, { m: "bar", help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", @@ -268,22 +318,22 @@ fizz_created 17.0` m: "bar_count", v: 17.0, lset: labels.FromStrings("__name__", "bar_count"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: "bar_sum", v: 324789.3, lset: labels.FromStrings("__name__", "bar_sum"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.95"}`, v: 123.7, lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.99"}`, v: 150.0, lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: "baz", help: "Histogram with the same objective as above's summary", @@ -294,22 +344,22 @@ fizz_created 17.0` m: `baz_bucket{le="0.0"}`, v: 0, lset: labels.FromStrings("__name__", "baz_bucket", "le", "0.0"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_bucket{le="+Inf"}`, v: 17, lset: labels.FromStrings("__name__", "baz_bucket", "le", "+Inf"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_count`, v: 17, lset: labels.FromStrings("__name__", "baz_count"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_sum`, v: 324789.3, lset: labels.FromStrings("__name__", "baz_sum"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: "fizz_created", help: "Gauge which shouldn't be parsed as CT", @@ -320,6 +370,89 @@ fizz_created 17.0` m: `fizz_created`, v: 17, lset: labels.FromStrings("__name__", "fizz_created"), + }, { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something_count`, + v: 18, + lset: labels.FromStrings("__name__", "something_count"), + ct: int64p(1520430001000), + }, { + m: `something_sum`, + v: 324789.4, + lset: labels.FromStrings("__name__", "something_sum"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="0.0"}`, + v: 1, + lset: labels.FromStrings("__name__", "something_bucket", "le", "0.0"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="1"}`, + v: 2, + lset: labels.FromStrings("__name__", "something_bucket", "le", "1.0"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="+Inf"}`, + v: 18, + lset: labels.FromStrings("__name__", "something_bucket", "le", "+Inf"), + ct: int64p(1520430001000), + }, { + m: "yum", + help: "Summary with _created between sum and quantiles", + }, { + m: "yum", + typ: model.MetricTypeSummary, + }, { + m: `yum_count`, + v: 20, + lset: labels.FromStrings("__name__", "yum_count"), + ct: int64p(1520430003000), + }, { + m: `yum_sum`, + v: 324789.5, + lset: labels.FromStrings("__name__", "yum_sum"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), + ct: int64p(1520430003000), + }, { + m: "foobar", + help: "Summary with _created as the first line", + }, { + m: "foobar", + typ: model.MetricTypeSummary, + }, { + m: `foobar_count`, + v: 21, + lset: labels.FromStrings("__name__", "foobar_count"), + ct: int64p(1520430004000), + }, { + m: `foobar_sum`, + v: 324789.6, + lset: labels.FromStrings("__name__", "foobar_sum"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.95"}`, + v: 123.8, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.99"}`, + v: 150.1, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), + ct: int64p(1520430004000), }, { m: "metric", help: "foo\x00bar", @@ -331,7 +464,8 @@ fizz_created 17.0` } p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - checkParseResultsWithCT(t, p, exp, true) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestUTF8OpenMetricsParse(t *testing.T) { @@ -346,7 +480,7 @@ func TestUTF8OpenMetricsParse(t *testing.T) { # UNIT "go.gc_duration_seconds" seconds {"go.gc_duration_seconds",quantile="0"} 4.9351e-05 {"go.gc_duration_seconds",quantile="0.25"} 7.424100000000001e-05 -{"go.gc_duration_seconds_created"} 12313 +{"go.gc_duration_seconds_created"} 1520872607.123 {"go.gc_duration_seconds",quantile="0.5",a="b"} 8.3835e-05 {"http.status",q="0.9",a="b"} 8.3835e-05 {"http.status",q="0.9",a="b"} 8.3835e-05 @@ -356,7 +490,7 @@ func TestUTF8OpenMetricsParse(t *testing.T) { input += "\n# EOF\n" - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go.gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -369,13 +503,13 @@ func TestUTF8OpenMetricsParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), - ct: int64p(12313), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), + ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.25"}`, v: 7.424100000000001e-05, lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.25"), - ct: int64p(12313), + ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.5",a="b"}`, v: 8.3835e-05, @@ -405,7 +539,8 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"), } p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - checkParseResultsWithCT(t, p, exp, true) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestOpenMetricsParseErrors(t *testing.T) { @@ -699,7 +834,7 @@ func TestOpenMetricsParseErrors(t *testing.T) { } for i, c := range cases { - p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable()) + p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) var err error for err == nil { _, err = p.Next() @@ -764,231 +899,121 @@ func TestOMNullByteHandling(t *testing.T) { } for i, c := range cases { - p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable()) + p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) var err error for err == nil { _, err = p.Next() } if c.err == "" { - require.Equal(t, io.EOF, err, "test %d", i) + require.ErrorIs(t, err, io.EOF, "test %d", i) continue } - require.Equal(t, c.err, err.Error(), "test %d", i) + require.EqualError(t, err, c.err, "test %d", i) } } -// While not desirable, there are cases were CT fails to parse and -// these tests show them. +// TestCTParseFailures tests known failure edge cases, we know does not work due +// current OM spec limitations or clients with broken OM format. // TODO(maniktherana): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. func TestCTParseFailures(t *testing.T) { - input := `# HELP something Histogram with _created between buckets and summary -# TYPE something histogram -something_count 17 -something_sum 324789.3 -something_created 1520430001 -something_bucket{le="0.0"} 0 -something_bucket{le="+Inf"} 17 -# HELP thing Histogram with _created as first line + for _, tcase := range []struct { + name string + input string + expected []parsedEntry + }{ + { + name: "_created line is a first one", + input: `# HELP thing histogram with _created as first line # TYPE thing histogram -thing_created 1520430002 +thing_created 1520872607.123 thing_count 17 thing_sum 324789.3 thing_bucket{le="0.0"} 0 thing_bucket{le="+Inf"} 17 -# HELP yum Summary with _created between sum and quantiles -# TYPE yum summary -yum_count 17.0 -yum_sum 324789.3 -yum_created 1520430003 -yum{quantile="0.95"} 123.7 -yum{quantile="0.99"} 150.0 -# HELP foobar Summary with _created as the first line -# TYPE foobar summary -foobar_created 1520430004 -foobar_count 17.0 -foobar_sum 324789.3 -foobar{quantile="0.95"} 123.7 -foobar{quantile="0.99"} 150.0` - - input += "\n# EOF\n" - - int64p := func(x int64) *int64 { return &x } - - type expectCT struct { - m string - ct *int64 - typ model.MetricType - help string - isErr bool - } - - exp := []expectCT{ - { - m: "something", - help: "Histogram with _created between buckets and summary", - isErr: false, - }, { - m: "something", - typ: model.MetricTypeHistogram, - isErr: false, - }, { - m: `something_count`, - ct: int64p(1520430001), - isErr: false, - }, { - m: `something_sum`, - ct: int64p(1520430001), - isErr: false, - }, { - m: `something_bucket{le="0.0"}`, - ct: int64p(1520430001), - isErr: true, - }, { - m: `something_bucket{le="+Inf"}`, - ct: int64p(1520430001), - isErr: true, - }, { - m: "thing", - help: "Histogram with _created as first line", - isErr: false, - }, { - m: "thing", - typ: model.MetricTypeHistogram, - isErr: false, - }, { - m: `thing_count`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_sum`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_bucket{le="0.0"}`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_bucket{le="+Inf"}`, - ct: int64p(1520430002), - isErr: true, - }, { - m: "yum", - help: "Summary with _created between summary and quantiles", - isErr: false, - }, { - m: "yum", - typ: model.MetricTypeSummary, - isErr: false, - }, { - m: "yum_count", - ct: int64p(1520430003), - isErr: false, - }, { - m: "yum_sum", - ct: int64p(1520430003), - isErr: false, - }, { - m: `yum{quantile="0.95"}`, - ct: int64p(1520430003), - isErr: true, - }, { - m: `yum{quantile="0.99"}`, - ct: int64p(1520430003), - isErr: true, - }, { - m: "foobar", - help: "Summary with _created as the first line", - isErr: false, - }, { - m: "foobar", - typ: model.MetricTypeSummary, - isErr: false, - }, { - m: "foobar_count", - ct: int64p(1520430004), - isErr: true, - }, { - m: "foobar_sum", - ct: int64p(1520430004), - isErr: true, - }, { - m: `foobar{quantile="0.95"}`, - ct: int64p(1520430004), - isErr: true, - }, { - m: `foobar{quantile="0.99"}`, - ct: int64p(1520430004), - isErr: true, +# HELP thing_c counter with _created as first line +# TYPE thing_c counter +thing_c_created 1520872607.123 +thing_c_total 14123.232 +# EOF +`, + expected: []parsedEntry{ + { + m: "thing", + help: "histogram with _created as first line", + }, + { + m: "thing", + typ: model.MetricTypeHistogram, + }, + { + m: `thing_count`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_sum`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="0.0"}`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="+Inf"}`, + ct: nil, // Should be int64p(1520872607123), + }, + { + m: "thing_c", + help: "counter with _created as first line", + }, + { + m: "thing_c", + typ: model.MetricTypeCounter, + }, + { + m: `thing_c_total`, + ct: nil, // Should be int64p(1520872607123). + }, + }, }, - } - - p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - i := 0 - - var res labels.Labels - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - p.Metric(&res) - - if ct := p.CreatedTimestamp(); exp[i].isErr { - require.Nil(t, ct) - } else { - require.Equal(t, *exp[i].ct, *ct) - } - default: - i++ - continue - } - i++ + { + // TODO(bwplotka): Kind of correct bevaviour? If yes, let's move to the OK tests above. + name: "maybe counter with no meta", + input: `foo_total 17.0 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 +foo_created{a="b"} 1520872608.123 +# EOF +`, + expected: []parsedEntry{ + { + m: `foo_total`, + }, + { + m: `foo_created`, + }, + { + m: `foo_total{a="b"}`, + }, + { + m: `foo_created{a="b"}`, + }, + }, + }, + } { + t.Run(fmt.Sprintf("case=%v", tcase.name), func(t *testing.T) { + p := NewOpenMetricsParser([]byte(tcase.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + got := testParse(t, p) + resetValAndLset(got) // Keep this test focused on metric, basic entries and CT only. + requireEntries(t, tcase.expected, got) + }) } } -func TestDeepCopy(t *testing.T) { - input := []byte(`# HELP go_goroutines A gauge goroutines. -# TYPE go_goroutines gauge -go_goroutines 33 123.123 -# TYPE go_gc_duration_seconds summary -go_gc_duration_seconds -go_gc_duration_seconds_created`) - - st := labels.NewSymbolTable() - parser := NewOpenMetricsParser(input, st, WithOMParserCTSeriesSkipped()).(*OpenMetricsParser) - - // Modify the original parser state - _, err := parser.Next() - require.NoError(t, err) - require.Equal(t, "go_goroutines", string(parser.l.b[parser.offsets[0]:parser.offsets[1]])) - require.True(t, parser.skipCTSeries) - - // Create a deep copy of the parser - copyParser := deepCopy(parser) - etype, err := copyParser.Next() - require.NoError(t, err) - require.Equal(t, EntryType, etype) - require.True(t, parser.skipCTSeries) - require.False(t, copyParser.skipCTSeries) - - // Modify the original parser further - parser.Next() - parser.Next() - parser.Next() - require.Equal(t, "go_gc_duration_seconds", string(parser.l.b[parser.offsets[0]:parser.offsets[1]])) - require.Equal(t, "summary", string(parser.mtype)) - require.False(t, copyParser.skipCTSeries) - require.True(t, parser.skipCTSeries) - - // Ensure the copy remains unchanged - copyParser.Next() - copyParser.Next() - require.Equal(t, "go_gc_duration_seconds", string(copyParser.l.b[copyParser.offsets[0]:copyParser.offsets[1]])) - require.False(t, copyParser.skipCTSeries) +func resetValAndLset(e []parsedEntry) { + for i := range e { + e[i].v = 0 + e[i].lset = labels.EmptyLabels() + } } diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index a611f3aea7..0ab932c665 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -239,7 +239,8 @@ func (p *PromParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) + p.builder.Add(label, value) } @@ -502,7 +503,7 @@ func unreplace(s string) string { } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } func parseFloat(s string) (float64, error) { diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index 7971d23b7e..e8cf66f539 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -14,37 +14,15 @@ package textparse import ( - "bytes" - "errors" "io" - "os" - "strings" "testing" - "github.com/klauspost/compress/gzip" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/prometheus/common/expfmt" - "github.com/prometheus/common/model" - - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/testutil" ) -type expectedParse struct { - lset labels.Labels - m string - t *int64 - v float64 - typ model.MetricType - help string - unit string - comment string - e *exemplar.Exemplar - ct *int64 -} - func TestPromParse(t *testing.T) { input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. # TYPE go_gc_duration_seconds summary @@ -53,6 +31,13 @@ go_gc_duration_seconds{quantile="0.25",} 7.424100000000001e-05 go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 go_gc_duration_seconds{quantile="0.8", a="b"} 8.3835e-05 go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05 +# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests. +# TYPE prometheus_http_request_duration_seconds histogram +prometheus_http_request_duration_seconds_bucket{handler="/",le="1"} 423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="2"} 1423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"} 1423 +prometheus_http_request_duration_seconds_sum{handler="/"} 2000 +prometheus_http_request_duration_seconds_count{handler="/"} 1423 # Hrandom comment starting with prefix of HELP # wind_speed{A="2",c="3"} 12345 @@ -72,13 +57,12 @@ some:aggregate:rate5m{a_b="c"} 1 go_goroutines 33 123123 _metric_starting_with_underscore 1 testmetric{_label_starting_with_underscore="foo"} 1 -testmetric{label="\"bar\""} 1` +testmetric{label="\"bar\""} 1 +testmetric{le="10"} 1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" - int64p := func(x int64) *int64 { return &x } - - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go_gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -88,7 +72,7 @@ testmetric{label="\"bar\""} 1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25",}`, v: 7.424100000000001e-05, @@ -105,6 +89,32 @@ testmetric{label="\"bar\""} 1` m: `go_gc_duration_seconds{ quantile="0.9", a="b"}`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.9", "a", "b"), + }, { + m: "prometheus_http_request_duration_seconds", + help: "Histogram of latencies for HTTP requests.", + }, { + m: "prometheus_http_request_duration_seconds", + typ: model.MetricTypeHistogram, + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="1"}`, + v: 423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "1.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="2"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "2.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "+Inf"), + }, { + m: `prometheus_http_request_duration_seconds_sum{handler="/"}`, + v: 2000, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_sum", "handler", "/"), + }, { + m: `prometheus_http_request_duration_seconds_count{handler="/"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_count", "handler", "/"), }, { comment: "# Hrandom comment starting with prefix of HELP", }, { @@ -140,7 +150,7 @@ testmetric{label="\"bar\""} 1` v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), }, { - // NOTE: Unlike OpenMetrics, Promparse allows spaces between label terms. This appears to be unintended and should probably be fixed. + // NOTE: Unlike OpenMetrics, PromParser allows spaces between label terms. This appears to be unintended and should probably be fixed. m: `go_gc_duration_seconds { quantile = "2.0" a = "b" }`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "2.0", "a", "b"), @@ -175,6 +185,10 @@ testmetric{label="\"bar\""} 1` m: "testmetric{label=\"\\\"bar\\\"\"}", v: 1, lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: `testmetric{le="10"}`, + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "le", "10"), }, { m: "metric", help: "foo\x00bar", @@ -186,80 +200,8 @@ testmetric{label="\"bar\""} 1` } p := NewPromParser([]byte(input), labels.NewSymbolTable()) - checkParseResults(t, p, exp) -} - -func checkParseResults(t *testing.T, p Parser, exp []expectedParse) { - checkParseResultsWithCT(t, p, exp, false) -} - -func checkParseResultsWithCT(t *testing.T, p Parser, exp []expectedParse, ctLinesRemoved bool) { - i := 0 - - var res labels.Labels - - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - m, ts, v := p.Series() - - p.Metric(&res) - - if ctLinesRemoved { - // Are CT series skipped? - _, typ := p.Type() - if TypeRequiresCT(typ) && strings.HasSuffix(res.Get(labels.MetricName), "_created") { - t.Fatalf("we exped created lines skipped") - } - } - - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].t, ts) - require.Equal(t, exp[i].v, v) - testutil.RequireEqual(t, exp[i].lset, res) - - var e exemplar.Exemplar - found := p.Exemplar(&e) - if exp[i].e == nil { - require.False(t, found) - } else { - require.True(t, found) - testutil.RequireEqual(t, *exp[i].e, e) - } - if ct := p.CreatedTimestamp(); ct != nil { - require.Equal(t, *exp[i].ct, *ct) - } else { - require.Nil(t, exp[i].ct) - } - - case EntryType: - m, typ := p.Type() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].typ, typ) - - case EntryHelp: - m, h := p.Help() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].help, string(h)) - - case EntryUnit: - m, u := p.Unit() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].unit, string(u)) - - case EntryComment: - require.Equal(t, exp[i].comment, string(p.Comment())) - } - - i++ - } - require.Len(t, exp, i) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestUTF8PromParse(t *testing.T) { @@ -283,7 +225,7 @@ func TestUTF8PromParse(t *testing.T) { {"go.gc_duration_seconds_count"} 99 {"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0` - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go.gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -293,7 +235,7 @@ func TestUTF8PromParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), }, { m: `{"go.gc_duration_seconds",quantile="0.25",}`, v: 7.424100000000001e-05, @@ -339,7 +281,8 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"), } p := NewPromParser([]byte(input), labels.NewSymbolTable()) - checkParseResults(t, p, exp) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestPromParseErrors(t *testing.T) { @@ -423,8 +366,7 @@ func TestPromParseErrors(t *testing.T) { for err == nil { _, err = p.Next() } - require.Error(t, err) - require.Equal(t, c.err, err.Error(), "test %d", i) + require.EqualError(t, err, c.err, "test %d", i) } } @@ -483,194 +425,6 @@ func TestPromNullByteHandling(t *testing.T) { continue } - require.Error(t, err) - require.Equal(t, c.err, err.Error(), "test %d", i) - } -} - -const ( - promtestdataSampleCount = 410 -) - -func BenchmarkParse(b *testing.B) { - for parserName, parser := range map[string]func([]byte, *labels.SymbolTable) Parser{ - "prometheus": NewPromParser, - "openmetrics": func(b []byte, st *labels.SymbolTable) Parser { - return NewOpenMetricsParser(b, st) - }, - } { - for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { - f, err := os.Open(fn) - require.NoError(b, err) - defer f.Close() - - buf, err := io.ReadAll(f) - require.NoError(b, err) - - b.Run(parserName+"/no-decode-metric/"+fn, func(b *testing.B) { - total := 0 - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run(parserName+"/decode-metric/"+fn, func(b *testing.B) { - total := 0 - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - var res labels.Labels - p.Metric(&res) - - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run(parserName+"/decode-metric-reuse/"+fn, func(b *testing.B) { - total := 0 - var res labels.Labels - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - p.Metric(&res) - - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run("expfmt-text/"+fn, func(b *testing.B) { - if parserName != "prometheus" { - b.Skip() - } - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - total := 0 - - for i := 0; i < b.N; i += promtestdataSampleCount { - decSamples := make(model.Vector, 0, 50) - sdec := expfmt.SampleDecoder{ - Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.NewFormat(expfmt.TypeTextPlain)), - Opts: &expfmt.DecodeOptions{ - Timestamp: model.TimeFromUnixNano(0), - }, - } - - for { - if err = sdec.Decode(&decSamples); err != nil { - break - } - total += len(decSamples) - decSamples = decSamples[:0] - } - } - _ = total - }) - } - } -} - -func BenchmarkGzip(b *testing.B) { - for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { - b.Run(fn, func(b *testing.B) { - f, err := os.Open(fn) - require.NoError(b, err) - defer f.Close() - - var buf bytes.Buffer - gw := gzip.NewWriter(&buf) - - n, err := io.Copy(gw, f) - require.NoError(b, err) - require.NoError(b, gw.Close()) - - gbuf, err := io.ReadAll(&buf) - require.NoError(b, err) - - k := b.N / promtestdataSampleCount - - b.ReportAllocs() - b.SetBytes(n / promtestdataSampleCount) - b.ResetTimer() - - total := 0 - - for i := 0; i < k; i++ { - gr, err := gzip.NewReader(bytes.NewReader(gbuf)) - require.NoError(b, err) - - d, err := io.ReadAll(gr) - require.NoError(b, err) - require.NoError(b, gr.Close()) - - total += len(d) - } - _ = total - }) + require.EqualError(t, err, c.err, "test %d", i) } } diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index e384a75fca..a77e1d728f 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -20,7 +20,9 @@ import ( "fmt" "io" "math" + "strconv" "strings" + "sync" "unicode/utf8" "github.com/gogo/protobuf/proto" @@ -34,6 +36,15 @@ import ( dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) +// floatFormatBufPool is exclusively used in formatOpenMetricsFloat. +var floatFormatBufPool = sync.Pool{ + New: func() interface{} { + // To contain at most 17 digits and additional syntax for a float64. + b := make([]byte, 0, 24) + return &b + }, +} + // ProtobufParser is a very inefficient way of unmarshaling the old Prometheus // protobuf format and then present it as it if were parsed by a // Prometheus-2-style text parser. This is only done so that we can easily plug @@ -457,6 +468,12 @@ func (p *ProtobufParser) Next() (Entry, error) { p.state = EntryHelp case EntryHelp: + if p.mf.Unit != "" { + p.state = EntryUnit + } else { + p.state = EntryType + } + case EntryUnit: p.state = EntryType case EntryType: t := p.mf.GetType() @@ -604,7 +621,7 @@ func readDelimited(b []byte, mf *dto.MetricFamily) (n int, err error) { return totalLength, mf.Unmarshal(b[varIntLength:totalLength]) } -// formatOpenMetricsFloat works like the usual Go string formatting of a fleat +// formatOpenMetricsFloat works like the usual Go string formatting of a float // but appends ".0" if the resulting number would otherwise contain neither a // "." nor an "e". func formatOpenMetricsFloat(f float64) string { @@ -623,11 +640,15 @@ func formatOpenMetricsFloat(f float64) string { case math.IsInf(f, -1): return "-Inf" } - s := fmt.Sprint(f) - if strings.ContainsAny(s, "e.") { - return s + bp := floatFormatBufPool.Get().(*[]byte) + defer floatFormatBufPool.Put(bp) + + *bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64) + if bytes.ContainsAny(*bp, "e.") { + return string(*bp) } - return s + ".0" + *bp = append(*bp, '.', '0') + return string(*bp) } // isNativeHistogram returns false iff the provided histograms has no spans at diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index cf34ae52df..065459a69a 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -16,8 +16,6 @@ package textparse import ( "bytes" "encoding/binary" - "errors" - "io" "testing" "github.com/gogo/protobuf/proto" @@ -27,12 +25,12 @@ import ( "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/testutil" - dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) -func createTestProtoBuf(t *testing.T) *bytes.Buffer { +func createTestProtoBuf(t testing.TB) *bytes.Buffer { + t.Helper() + testMetricFamilies := []string{ `name: "go_build_info" help: "Build information about the main Go module." @@ -411,6 +409,49 @@ metric: < > > +`, + `name: "test_histogram3" +help: "Similar histogram as before but now with integer buckets." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 6 + sample_sum: 50 + bucket: < + cumulative_count: 2 + upper_bound: -20 + > + bucket: < + cumulative_count: 4 + upper_bound: 20 + exemplar: < + label: < + name: "dummyID" + value: "59727" + > + value: 15 + timestamp: < + seconds: 1625851153 + nanos: 146848499 + > + > + > + bucket: < + cumulative_count: 6 + upper_bound: 30 + exemplar: < + label: < + name: "dummyID" + value: "5617" + > + value: 25 + > + > + schema: 0 + zero_threshold: 0 + > +> + `, `name: "test_histogram_family" help: "Test histogram metric family with two very simple histograms." @@ -783,32 +824,17 @@ metric: < } func TestProtobufParse(t *testing.T) { - type parseResult struct { - lset labels.Labels - m string - t int64 - v float64 - typ model.MetricType - help string - unit string - comment string - shs *histogram.Histogram - fhs *histogram.FloatHistogram - e []exemplar.Exemplar - ct int64 - } - inputBuf := createTestProtoBuf(t) scenarios := []struct { name string parser Parser - expected []parseResult + expected []parsedEntry }{ { name: "ignore classic buckets of native histograms", parser: NewProtobufParser(inputBuf.Bytes(), false, labels.NewSymbolTable()), - expected: []parseResult{ + expected: []parsedEntry{ { m: "go_build_info", help: "Build information about the main Go module.", @@ -830,6 +856,9 @@ func TestProtobufParse(t *testing.T) { { m: "go_memstats_alloc_bytes_total", help: "Total number of bytes allocated, even if freed.", + }, + { + m: "go_memstats_alloc_bytes_total", unit: "bytes", }, { @@ -842,7 +871,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "go_memstats_alloc_bytes_total", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "42"), Value: 12, HasTs: true, Ts: 1625851151233}, }, }, @@ -856,7 +885,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "something_untyped", - t: 1234567, + t: int64p(1234567), v: 42, lset: labels.FromStrings( "__name__", "something_untyped", @@ -872,7 +901,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -893,7 +922,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -907,7 +936,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, Count: 175, @@ -929,7 +958,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -943,7 +972,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ Count: 175.0, ZeroCount: 2.0, @@ -964,7 +993,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -978,7 +1007,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ CounterResetHint: histogram.GaugeType, Count: 175.0, @@ -1000,7 +1029,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -1041,7 +1070,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram2_bucket", "le", "-0.00038", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00038, HasTs: true, Ts: 1625851153146}, }, }, @@ -1052,7 +1081,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram2_bucket", "le", "1.0", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.000295, HasTs: false}, }, }, @@ -1064,6 +1093,66 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", @@ -1235,7 +1324,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_counter_with_createdtimestamp", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), @@ -1251,7 +1340,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_count", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), @@ -1259,7 +1348,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), @@ -1274,7 +1363,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -1294,7 +1383,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gaugehistogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -1314,7 +1403,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_native_histogram_exemplars", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1335,7 +1424,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, @@ -1350,7 +1439,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_native_histogram_exemplars2", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1371,7 +1460,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -1380,16 +1469,16 @@ func TestProtobufParse(t *testing.T) { { name: "parse classic and native buckets", parser: NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()), - expected: []parseResult{ - { // 0 + expected: []parsedEntry{ + { m: "go_build_info", help: "Build information about the main Go module.", }, - { // 1 + { m: "go_build_info", typ: model.MetricTypeGauge, }, - { // 2 + { m: "go_build_info\xFFchecksum\xFF\xFFpath\xFFgithub.com/prometheus/client_golang\xFFversion\xFF(devel)", v: 1, lset: labels.FromStrings( @@ -1399,51 +1488,55 @@ func TestProtobufParse(t *testing.T) { "version", "(devel)", ), }, - { // 3 + { m: "go_memstats_alloc_bytes_total", help: "Total number of bytes allocated, even if freed.", }, - { // 4 + { + m: "go_memstats_alloc_bytes_total", + unit: "bytes", + }, + { m: "go_memstats_alloc_bytes_total", typ: model.MetricTypeCounter, }, - { // 5 + { m: "go_memstats_alloc_bytes_total", v: 1.546544e+06, lset: labels.FromStrings( "__name__", "go_memstats_alloc_bytes_total", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "42"), Value: 12, HasTs: true, Ts: 1625851151233}, }, }, - { // 6 + { m: "something_untyped", help: "Just to test the untyped type.", }, - { // 7 + { m: "something_untyped", typ: model.MetricTypeUnknown, }, - { // 8 + { m: "something_untyped", - t: 1234567, + t: int64p(1234567), v: 42, lset: labels.FromStrings( "__name__", "something_untyped", ), }, - { // 9 + { m: "test_histogram", help: "Test histogram with many buckets removed to keep it manageable in size.", }, - { // 10 + { m: "test_histogram", typ: model.MetricTypeHistogram, }, - { // 11 + { m: "test_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1464,79 +1557,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 12 + { m: "test_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_count", ), }, - { // 13 + { m: "test_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_sum", ), }, - { // 14 + { m: "test_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 15 + { m: "test_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 16 + { m: "test_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 17 + { m: "test_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "+Inf", ), }, - { // 18 + { m: "test_gauge_histogram", help: "Like test_histogram but as gauge histogram.", }, - { // 19 + { m: "test_gauge_histogram", typ: model.MetricTypeGaugeHistogram, }, - { // 20 + { m: "test_gauge_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, Count: 175, @@ -1558,79 +1651,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 21 + { m: "test_gauge_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_histogram_count", ), }, - { // 22 + { m: "test_gauge_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_gauge_histogram_sum", ), }, - { // 23 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 24 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 25 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 26 + { m: "test_gauge_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "+Inf", ), }, - { // 27 + { m: "test_float_histogram", help: "Test float histogram with many buckets removed to keep it manageable in size.", }, - { // 28 + { m: "test_float_histogram", typ: model.MetricTypeHistogram, }, - { // 29 + { m: "test_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ Count: 175.0, ZeroCount: 2.0, @@ -1651,79 +1744,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 30 + { m: "test_float_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_float_histogram_count", ), }, - { // 31 + { m: "test_float_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_float_histogram_sum", ), }, - { // 32 + { m: "test_float_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 33 + { m: "test_float_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 34 + { m: "test_float_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 35 + { m: "test_float_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "+Inf", ), }, - { // 36 + { m: "test_gauge_float_histogram", help: "Like test_float_histogram but as gauge histogram.", }, - { // 37 + { m: "test_gauge_float_histogram", typ: model.MetricTypeGaugeHistogram, }, - { // 38 + { m: "test_gauge_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ CounterResetHint: histogram.GaugeType, Count: 175.0, @@ -1745,91 +1838,91 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 39 + { m: "test_gauge_float_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_count", ), }, - { // 40 + { m: "test_gauge_float_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_sum", ), }, - { // 41 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 42 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 43 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 44 + { m: "test_gauge_float_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "+Inf", ), }, - { // 45 + { m: "test_histogram2", help: "Similar histogram as before but now without sparse buckets.", }, - { // 46 + { m: "test_histogram2", typ: model.MetricTypeHistogram, }, - { // 47 + { m: "test_histogram2_count", v: 175, lset: labels.FromStrings( "__name__", "test_histogram2_count", ), }, - { // 48 + { m: "test_histogram2_sum", v: 0.000828, lset: labels.FromStrings( "__name__", "test_histogram2_sum", ), }, - { // 49 + { m: "test_histogram2_bucket\xffle\xff-0.00048", v: 2, lset: labels.FromStrings( @@ -1837,29 +1930,29 @@ func TestProtobufParse(t *testing.T) { "le", "-0.00048", ), }, - { // 50 + { m: "test_histogram2_bucket\xffle\xff-0.00038", v: 4, lset: labels.FromStrings( "__name__", "test_histogram2_bucket", "le", "-0.00038", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00038, HasTs: true, Ts: 1625851153146}, }, }, - { // 51 + { m: "test_histogram2_bucket\xffle\xff1.0", v: 16, lset: labels.FromStrings( "__name__", "test_histogram2_bucket", "le", "1.0", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.000295, HasTs: false}, }, }, - { // 52 + { m: "test_histogram2_bucket\xffle\xff+Inf", v: 175, lset: labels.FromStrings( @@ -1867,15 +1960,75 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 53 + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, + { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", }, - { // 54 + { m: "test_histogram_family", typ: model.MetricTypeHistogram, }, - { // 55 + { m: "test_histogram_family\xfffoo\xffbar", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -1893,7 +2046,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 56 + { m: "test_histogram_family_count\xfffoo\xffbar", v: 5, lset: labels.FromStrings( @@ -1901,7 +2054,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 57 + { m: "test_histogram_family_sum\xfffoo\xffbar", v: 12.1, lset: labels.FromStrings( @@ -1909,7 +2062,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 58 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff1.1", v: 2, lset: labels.FromStrings( @@ -1918,7 +2071,7 @@ func TestProtobufParse(t *testing.T) { "le", "1.1", ), }, - { // 59 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff2.2", v: 3, lset: labels.FromStrings( @@ -1927,7 +2080,7 @@ func TestProtobufParse(t *testing.T) { "le", "2.2", ), }, - { // 60 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff+Inf", v: 5, lset: labels.FromStrings( @@ -1936,7 +2089,7 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 61 + { m: "test_histogram_family\xfffoo\xffbaz", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -1954,7 +2107,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 62 + { m: "test_histogram_family_count\xfffoo\xffbaz", v: 6, lset: labels.FromStrings( @@ -1962,7 +2115,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 63 + { m: "test_histogram_family_sum\xfffoo\xffbaz", v: 13.1, lset: labels.FromStrings( @@ -1970,7 +2123,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 64 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff1.1", v: 1, lset: labels.FromStrings( @@ -1979,7 +2132,7 @@ func TestProtobufParse(t *testing.T) { "le", "1.1", ), }, - { // 65 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff2.2", v: 5, lset: labels.FromStrings( @@ -1988,7 +2141,7 @@ func TestProtobufParse(t *testing.T) { "le", "2.2", ), }, - { // 66 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff+Inf", v: 6, lset: labels.FromStrings( @@ -1997,15 +2150,15 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 67 + { m: "test_float_histogram_with_zerothreshold_zero", help: "Test float histogram with a zero threshold of zero.", }, - { // 68 + { m: "test_float_histogram_with_zerothreshold_zero", typ: model.MetricTypeHistogram, }, - { // 69 + { m: "test_float_histogram_with_zerothreshold_zero", fhs: &histogram.FloatHistogram{ Count: 5.0, @@ -2021,15 +2174,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_float_histogram_with_zerothreshold_zero", ), }, - { // 70 + { m: "rpc_durations_seconds", help: "RPC latency distributions.", }, - { // 71 + { m: "rpc_durations_seconds", typ: model.MetricTypeSummary, }, - { // 72 + { m: "rpc_durations_seconds_count\xffservice\xffexponential", v: 262, lset: labels.FromStrings( @@ -2037,7 +2190,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 73 + { m: "rpc_durations_seconds_sum\xffservice\xffexponential", v: 0.00025551262820703587, lset: labels.FromStrings( @@ -2045,7 +2198,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 74 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.5", v: 6.442786329648548e-07, lset: labels.FromStrings( @@ -2054,7 +2207,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 75 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.9", v: 1.9435742936658396e-06, lset: labels.FromStrings( @@ -2063,7 +2216,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 76 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.99", v: 4.0471608667037015e-06, lset: labels.FromStrings( @@ -2072,37 +2225,37 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 77 + { m: "without_quantiles", help: "A summary without quantiles.", }, - { // 78 + { m: "without_quantiles", typ: model.MetricTypeSummary, }, - { // 79 + { m: "without_quantiles_count", v: 42, lset: labels.FromStrings( "__name__", "without_quantiles_count", ), }, - { // 80 + { m: "without_quantiles_sum", v: 1.234, lset: labels.FromStrings( "__name__", "without_quantiles_sum", ), }, - { // 81 + { m: "empty_histogram", help: "A histogram without observations and with a zero threshold of zero but with a no-op span to identify it as a native histogram.", }, - { // 82 + { m: "empty_histogram", typ: model.MetricTypeHistogram, }, - { // 83 + { m: "empty_histogram", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -2113,57 +2266,57 @@ func TestProtobufParse(t *testing.T) { "__name__", "empty_histogram", ), }, - { // 84 + { m: "test_counter_with_createdtimestamp", help: "A counter with a created timestamp.", }, - { // 85 + { m: "test_counter_with_createdtimestamp", typ: model.MetricTypeCounter, }, - { // 86 + { m: "test_counter_with_createdtimestamp", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), }, - { // 87 + { m: "test_summary_with_createdtimestamp", help: "A summary with a created timestamp.", }, - { // 88 + { m: "test_summary_with_createdtimestamp", typ: model.MetricTypeSummary, }, - { // 89 + { m: "test_summary_with_createdtimestamp_count", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), }, - { // 90 + { m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), }, - { // 91 + { m: "test_histogram_with_createdtimestamp", help: "A histogram with a created timestamp.", }, - { // 92 + { m: "test_histogram_with_createdtimestamp", typ: model.MetricTypeHistogram, }, - { // 93 + { m: "test_histogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -2173,17 +2326,17 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram_with_createdtimestamp", ), }, - { // 94 + { m: "test_gaugehistogram_with_createdtimestamp", help: "A gauge histogram with a created timestamp.", }, - { // 95 + { m: "test_gaugehistogram_with_createdtimestamp", typ: model.MetricTypeGaugeHistogram, }, - { // 96 + { m: "test_gaugehistogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -2193,17 +2346,17 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_gaugehistogram_with_createdtimestamp", ), }, - { // 97 + { m: "test_histogram_with_native_histogram_exemplars", help: "A histogram with native histogram exemplars.", }, - { // 98 + { m: "test_histogram_with_native_histogram_exemplars", typ: model.MetricTypeHistogram, }, - { // 99 + { m: "test_histogram_with_native_histogram_exemplars", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2224,80 +2377,80 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, }, - { // 100 + { m: "test_histogram_with_native_histogram_exemplars_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_count", ), }, - { // 101 + { m: "test_histogram_with_native_histogram_exemplars_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_sum", ), }, - { // 102 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0004899999999999998", ), }, - { // 103 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 104 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 105 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "+Inf", ), }, - { // 106 + { m: "test_histogram_with_native_histogram_exemplars2", help: "Another histogram with native histogram exemplars.", }, - { // 107 + { m: "test_histogram_with_native_histogram_exemplars2", typ: model.MetricTypeHistogram, }, - { // 108 + { m: "test_histogram_with_native_histogram_exemplars2", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2318,56 +2471,56 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 109 + { m: "test_histogram_with_native_histogram_exemplars2_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_count", ), }, - { // 110 + { m: "test_histogram_with_native_histogram_exemplars2_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_sum", ), }, - { // 111 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0004899999999999998", ), }, - { // 112 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0003899999999999998", ), }, - { // 113 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0002899999999999998", ), }, - { // 114 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", @@ -2381,94 +2534,11 @@ func TestProtobufParse(t *testing.T) { for _, scenario := range scenarios { t.Run(scenario.name, func(t *testing.T) { var ( - i int - res labels.Labels p = scenario.parser exp = scenario.expected ) - - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - m, ts, v := p.Series() - - var e exemplar.Exemplar - p.Metric(&res) - eFound := p.Exemplar(&e) - ct := p.CreatedTimestamp() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if ts != nil { - require.Equal(t, exp[i].t, *ts, "i: %d", i) - } else { - require.Equal(t, int64(0), exp[i].t, "i: %d", i) - } - require.Equal(t, exp[i].v, v, "i: %d", i) - testutil.RequireEqual(t, exp[i].lset, res, "i: %d", i) - if len(exp[i].e) == 0 { - require.False(t, eFound, "i: %d", i) - } else { - require.True(t, eFound, "i: %d", i) - testutil.RequireEqual(t, exp[i].e[0], e, "i: %d", i) - require.False(t, p.Exemplar(&e), "too many exemplars returned, i: %d", i) - } - if exp[i].ct != 0 { - require.NotNilf(t, ct, "i: %d", i) - require.Equal(t, exp[i].ct, *ct, "i: %d", i) - } else { - require.Nilf(t, ct, "i: %d", i) - } - - case EntryHistogram: - m, ts, shs, fhs := p.Histogram() - p.Metric(&res) - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if ts != nil { - require.Equal(t, exp[i].t, *ts, "i: %d", i) - } else { - require.Equal(t, int64(0), exp[i].t, "i: %d", i) - } - testutil.RequireEqual(t, exp[i].lset, res, "i: %d", i) - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if shs != nil { - require.Equal(t, exp[i].shs, shs, "i: %d", i) - } else { - require.Equal(t, exp[i].fhs, fhs, "i: %d", i) - } - j := 0 - for e := (exemplar.Exemplar{}); p.Exemplar(&e); j++ { - testutil.RequireEqual(t, exp[i].e[j], e, "i: %d", i) - e = exemplar.Exemplar{} - } - require.Len(t, exp[i].e, j, "not enough exemplars found, i: %d", i) - - case EntryType: - m, typ := p.Type() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].typ, typ, "i: %d", i) - - case EntryHelp: - m, h := p.Help() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].help, string(h), "i: %d", i) - - case EntryUnit: - m, u := p.Unit() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].unit, string(u), "i: %d", i) - - case EntryComment: - require.Equal(t, exp[i].comment, string(p.Comment()), "i: %d", i) - } - - i++ - } - require.Len(t, exp, i) + got := testParse(t, p) + requireEntries(t, exp, got) }) } } diff --git a/model/textparse/testdata/omhistogramdata.txt b/model/textparse/testdata/omhistogramdata.txt new file mode 100644 index 0000000000..1876168355 --- /dev/null +++ b/model/textparse/testdata/omhistogramdata.txt @@ -0,0 +1,45 @@ +# HELP golang_manual_histogram_seconds This is a histogram with manually selected parameters +# TYPE golang_manual_histogram_seconds histogram +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5001"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5001"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5002"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 1 +# EOF \ No newline at end of file diff --git a/model/textparse/testdata/omtestdata.txt b/model/textparse/testdata/omtestdata.txt new file mode 100644 index 0000000000..0f5f78b8b9 --- /dev/null +++ b/model/textparse/testdata/omtestdata.txt @@ -0,0 +1,64 @@ +# HELP go_build_info Build information about the main Go module. +# TYPE go_build_info gauge +go_build_info{checksum="",path="",version=""} 1.0 +# HELP promhttp_metric_handler_errors Total number of internal errors encountered by the promhttp metric handler. +# TYPE promhttp_metric_handler_errors counter +promhttp_metric_handler_errors_total{cause="encoding"} 0.0 +promhttp_metric_handler_errors_created{cause="encoding"} 1.726839813016397e+09 +promhttp_metric_handler_errors_total{cause="gathering"} 0.0 +promhttp_metric_handler_errors_created{cause="gathering"} 1.726839813016395e+09 +# HELP rpc_durations_histogram_seconds RPC latency distributions. +# TYPE rpc_durations_histogram_seconds histogram +rpc_durations_histogram_seconds_bucket{le="-0.00099"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.00089"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0007899999999999999"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0006899999999999999"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0005899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0004899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0003899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0002899999999999998"} 3 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09 +rpc_durations_histogram_seconds_bucket{le="-0.0001899999999999998"} 5 # {dummyID="84741"} -0.00020178290006788965 1.726839814829977e+09 +rpc_durations_histogram_seconds_bucket{le="-8.999999999999979e-05"} 5 +rpc_durations_histogram_seconds_bucket{le="1.0000000000000216e-05"} 8 # {dummyID="19206"} -4.6156147425468016e-05 1.7268398151337721e+09 +rpc_durations_histogram_seconds_bucket{le="0.00011000000000000022"} 9 # {dummyID="3974"} 9.528436760156754e-05 1.726839814526797e+09 +rpc_durations_histogram_seconds_bucket{le="0.00021000000000000023"} 11 # {dummyID="29640"} 0.00017459624183458996 1.7268398139220061e+09 +rpc_durations_histogram_seconds_bucket{le="0.0003100000000000002"} 15 # {dummyID="9818"} 0.0002791130914009552 1.7268398149821382e+09 +rpc_durations_histogram_seconds_bucket{le="0.0004100000000000002"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0005100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0006100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0007100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0008100000000000004"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0009100000000000004"} 15 +rpc_durations_histogram_seconds_bucket{le="+Inf"} 15 +rpc_durations_histogram_seconds_sum -8.452185437166741e-05 +rpc_durations_histogram_seconds_count 15 +rpc_durations_histogram_seconds_created 1.726839813016302e+09 +# HELP rpc_durations_seconds RPC latency distributions. +# TYPE rpc_durations_seconds summary +rpc_durations_seconds{service="exponential",quantile="0.5"} 7.689368882420941e-07 +rpc_durations_seconds{service="exponential",quantile="0.9"} 1.6537614174305048e-06 +rpc_durations_seconds{service="exponential",quantile="0.99"} 2.0965499063061924e-06 +rpc_durations_seconds_sum{service="exponential"} 2.0318666372575776e-05 +rpc_durations_seconds_count{service="exponential"} 22 +rpc_durations_seconds_created{service="exponential"} 1.7268398130168908e+09 +rpc_durations_seconds{service="normal",quantile="0.5"} -5.066758674917046e-06 +rpc_durations_seconds{service="normal",quantile="0.9"} 0.0002935723711788224 +rpc_durations_seconds{service="normal",quantile="0.99"} 0.0003023094636293776 +rpc_durations_seconds_sum{service="normal"} -8.452185437166741e-05 +rpc_durations_seconds_count{service="normal"} 15 +rpc_durations_seconds_created{service="normal"} 1.726839813016714e+09 +rpc_durations_seconds{service="uniform",quantile="0.5"} 9.005014931474918e-05 +rpc_durations_seconds{service="uniform",quantile="0.9"} 0.00017801230208182325 +rpc_durations_seconds{service="uniform",quantile="0.99"} 0.00018641524538180192 +rpc_durations_seconds_sum{service="uniform"} 0.0011666095700533677 +rpc_durations_seconds_count{service="uniform"} 11 +rpc_durations_seconds_created{service="uniform"} 1.72683981301684e+09 +# HELP rpc_requests Total number of RPC requests received. +# TYPE rpc_requests counter +rpc_requests_total{service="exponential"} 22.0 +rpc_requests_created{service="exponential"} 1.726839813016893e+09 +rpc_requests_total{service="normal"} 15.0 +rpc_requests_created{service="normal"} 1.726839813016717e+09 +rpc_requests_total{service="uniform"} 11.0 +rpc_requests_created{service="uniform"} 1.7268398130168471e+09 +# EOF diff --git a/model/textparse/promtestdata.nometa.txt b/model/textparse/testdata/promtestdata.nometa.txt similarity index 100% rename from model/textparse/promtestdata.nometa.txt rename to model/textparse/testdata/promtestdata.nometa.txt diff --git a/model/textparse/promtestdata.txt b/model/textparse/testdata/promtestdata.txt similarity index 100% rename from model/textparse/promtestdata.txt rename to model/textparse/testdata/promtestdata.txt diff --git a/notifier/notifier.go b/notifier/notifier.go index 218e4cb8c7..e970b67e6d 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -16,25 +16,28 @@ package notifier import ( "bytes" "context" + "crypto/md5" + "encoding/hex" "encoding/json" "fmt" "io" + "log/slog" "net/http" "net/url" "path" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/go-openapi/strfmt" "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" "go.uber.org/atomic" + "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -117,7 +120,7 @@ type Manager struct { stopRequested chan struct{} alertmanagers map[string]*alertmanagerSet - logger log.Logger + logger *slog.Logger } // Options are the configurable parameters of a Handler. @@ -218,12 +221,12 @@ func do(ctx context.Context, client *http.Client, req *http.Request) (*http.Resp } // NewManager is the manager constructor. -func NewManager(o *Options, logger log.Logger) *Manager { +func NewManager(o *Options, logger *slog.Logger) *Manager { if o.Do == nil { o.Do = do } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } n := &Manager{ @@ -257,6 +260,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs amSets := make(map[string]*alertmanagerSet) + // configToAlertmanagers maps alertmanager sets for each unique AlertmanagerConfig, + // helping to avoid dropping known alertmanagers and re-use them without waiting for SD updates when applying the config. + configToAlertmanagers := make(map[string]*alertmanagerSet, len(n.alertmanagers)) + for _, oldAmSet := range n.alertmanagers { + hash, err := oldAmSet.configHash() + if err != nil { + return err + } + configToAlertmanagers[hash] = oldAmSet + } for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() { ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics) @@ -264,6 +277,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { return err } + hash, err := ams.configHash() + if err != nil { + return err + } + + if oldAmSet, ok := configToAlertmanagers[hash]; ok { + ams.ams = oldAmSet.ams + ams.droppedAms = oldAmSet.droppedAms + } + amSets[k] = ams } @@ -319,7 +342,7 @@ func (n *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) { }() wg.Wait() - level.Info(n.logger).Log("msg", "Notification manager stopped") + n.logger.Info("Notification manager stopped") } // sendLoop continuously consumes the notifications queue and sends alerts to @@ -376,20 +399,20 @@ func (n *Manager) sendOneBatch() { func (n *Manager) drainQueue() { if !n.opts.DrainOnShutdown { if n.queueLen() > 0 { - level.Warn(n.logger).Log("msg", "Draining remaining notifications on shutdown is disabled, and some notifications have been dropped", "count", n.queueLen()) + n.logger.Warn("Draining remaining notifications on shutdown is disabled, and some notifications have been dropped", "count", n.queueLen()) n.metrics.dropped.Add(float64(n.queueLen())) } return } - level.Info(n.logger).Log("msg", "Draining any remaining notifications...") + n.logger.Info("Draining any remaining notifications...") for n.queueLen() > 0 { n.sendOneBatch() } - level.Info(n.logger).Log("msg", "Remaining notifications drained") + n.logger.Info("Remaining notifications drained") } func (n *Manager) reload(tgs map[string][]*targetgroup.Group) { @@ -399,7 +422,7 @@ func (n *Manager) reload(tgs map[string][]*targetgroup.Group) { for id, tgroup := range tgs { am, ok := n.alertmanagers[id] if !ok { - level.Error(n.logger).Log("msg", "couldn't sync alert manager set", "err", fmt.Sprintf("invalid id:%v", id)) + n.logger.Error("couldn't sync alert manager set", "err", fmt.Sprintf("invalid id:%v", id)) continue } am.sync(tgroup) @@ -422,7 +445,7 @@ func (n *Manager) Send(alerts ...*Alert) { if d := len(alerts) - n.opts.QueueCapacity; d > 0 { alerts = alerts[d:] - level.Warn(n.logger).Log("msg", "Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) + n.logger.Warn("Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) n.metrics.dropped.Add(float64(d)) } @@ -431,7 +454,7 @@ func (n *Manager) Send(alerts ...*Alert) { if d := (len(n.queue) + len(alerts)) - n.opts.QueueCapacity; d > 0 { n.queue = n.queue[d:] - level.Warn(n.logger).Log("msg", "Alert notification queue full, dropping alerts", "num_dropped", d) + n.logger.Warn("Alert notification queue full, dropping alerts", "num_dropped", d) n.metrics.dropped.Add(float64(d)) } n.queue = append(n.queue, alerts...) @@ -519,10 +542,10 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { begin := time.Now() - // v1Payload and v2Payload represent 'alerts' marshaled for Alertmanager API - // v1 or v2. Marshaling happens below. Reference here is for caching between + // cachedPayload represent 'alerts' marshaled for Alertmanager API v2. + // Marshaling happens below. Reference here is for caching between // for loop iterations. - var v1Payload, v2Payload []byte + var cachedPayload []byte n.mtx.RLock() amSets := n.alertmanagers @@ -553,42 +576,29 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { continue } // We can't use the cached values from previous iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } switch ams.cfg.APIVersion { - case config.AlertmanagerAPIVersionV1: - { - if v1Payload == nil { - v1Payload, err = json.Marshal(amAlerts) - if err != nil { - level.Error(n.logger).Log("msg", "Encoding alerts for Alertmanager API v1 failed", "err", err) - ams.mtx.RUnlock() - return false - } - } - - payload = v1Payload - } case config.AlertmanagerAPIVersionV2: { - if v2Payload == nil { + if cachedPayload == nil { openAPIAlerts := alertsToOpenAPIAlerts(amAlerts) - v2Payload, err = json.Marshal(openAPIAlerts) + cachedPayload, err = json.Marshal(openAPIAlerts) if err != nil { - level.Error(n.logger).Log("msg", "Encoding alerts for Alertmanager API v2 failed", "err", err) + n.logger.Error("Encoding alerts for Alertmanager API v2 failed", "err", err) ams.mtx.RUnlock() return false } } - payload = v2Payload + payload = cachedPayload } default: { - level.Error(n.logger).Log( - "msg", fmt.Sprintf("Invalid Alertmanager API version '%v', expected one of '%v'", ams.cfg.APIVersion, config.SupportedAlertmanagerAPIVersions), + n.logger.Error( + fmt.Sprintf("Invalid Alertmanager API version '%v', expected one of '%v'", ams.cfg.APIVersion, config.SupportedAlertmanagerAPIVersions), "err", err, ) ams.mtx.RUnlock() @@ -598,7 +608,7 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { if len(ams.cfg.AlertRelabelConfigs) > 0 { // We can't use the cached values on the next iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } for _, am := range ams.ams { @@ -609,7 +619,7 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { go func(ctx context.Context, client *http.Client, url string, payload []byte, count int) { if err := n.sendOne(ctx, client, url, payload); err != nil { - level.Error(n.logger).Log("alertmanager", url, "count", count, "msg", "Error sending alert", "err", err) + n.logger.Error("Error sending alert", "alertmanager", url, "count", count, "err", err) n.metrics.errors.WithLabelValues(url).Inc() } else { numSuccess.Inc() @@ -689,7 +699,7 @@ func (n *Manager) sendOne(ctx context.Context, c *http.Client, url string, b []b // // Stop is safe to call multiple times. func (n *Manager) Stop() { - level.Info(n.logger).Log("msg", "Stopping notification manager...") + n.logger.Info("Stopping notification manager...") n.stopOnce.Do(func() { close(n.stopRequested) @@ -724,10 +734,10 @@ type alertmanagerSet struct { mtx sync.RWMutex ams []alertmanager droppedAms []alertmanager - logger log.Logger + logger *slog.Logger } -func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger, metrics *alertMetrics) (*alertmanagerSet, error) { +func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger *slog.Logger, metrics *alertMetrics) (*alertmanagerSet, error) { client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "alertmanager") if err != nil { return nil, err @@ -761,7 +771,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { for _, tg := range tgs { ams, droppedAms, err := AlertmanagerFromGroup(tg, s.cfg) if err != nil { - level.Error(s.logger).Log("msg", "Creating discovered Alertmanagers failed", "err", err) + s.logger.Error("Creating discovered Alertmanagers failed", "err", err) continue } allAms = append(allAms, ams...) @@ -770,6 +780,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { s.mtx.Lock() defer s.mtx.Unlock() + previousAms := s.ams // Set new Alertmanagers and deduplicate them along their unique URL. s.ams = []alertmanager{} s.droppedAms = []alertmanager{} @@ -789,6 +800,26 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { seen[us] = struct{}{} s.ams = append(s.ams, am) } + // Now remove counters for any removed Alertmanagers. + for _, am := range previousAms { + us := am.url().String() + if _, ok := seen[us]; ok { + continue + } + s.metrics.latency.DeleteLabelValues(us) + s.metrics.sent.DeleteLabelValues(us) + s.metrics.errors.DeleteLabelValues(us) + seen[us] = struct{}{} + } +} + +func (s *alertmanagerSet) configHash() (string, error) { + b, err := yaml.Marshal(s.cfg) + if err != nil { + return "", err + } + hash := md5.Sum(b) + return hex.EncodeToString(hash[:]), nil } func postPath(pre string, v config.AlertmanagerAPIVersion) string { diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index cf922a537c..97b0274f29 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -26,11 +26,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/config" + _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" @@ -49,27 +50,27 @@ func TestPostPath(t *testing.T) { }{ { in: "", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/prefix", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "/prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, } for _, c := range cases { - require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV1)) + require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV2)) } } @@ -743,7 +744,7 @@ func TestHangingNotifier(t *testing.T) { // Initialize the discovery manager // This is relevant as the updates aren't sent continually in real life, but only each updatert. - // The old implementation of TestHangingNotifier didn't take that into acount. + // The old implementation of TestHangingNotifier didn't take that into account. ctx, cancel := context.WithCancel(context.Background()) defer cancel() reg := prometheus.NewRegistry() @@ -751,7 +752,7 @@ func TestHangingNotifier(t *testing.T) { require.NoError(t, err) sdManager := discovery.NewManager( ctx, - log.NewNopLogger(), + promslog.NewNopLogger(), reg, sdMetrics, discovery.Name("sd-manager"), @@ -1017,3 +1018,107 @@ func TestStop_DrainingEnabled(t *testing.T) { require.Equal(t, int64(2), alertsReceived.Load()) } + +func TestApplyConfig(t *testing.T) { + targetURL := "alertmanager:9093" + targetGroup := &targetgroup.Group{ + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue(targetURL), + }, + }, + } + alertmanagerURL := fmt.Sprintf("http://%s/api/v2/alerts", targetURL) + + n := NewManager(&Options{}, nil) + cfg := &config.Config{} + s := ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json +` + // 1. Ensure known alertmanagers are not dropped during ApplyConfig. + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1) + + // First, apply the config and reload. + require.NoError(t, n.ApplyConfig(cfg)) + tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}} + n.reload(tgs) + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // Reapply the config. + require.NoError(t, n.ApplyConfig(cfg)) + // Ensure the known alertmanagers are not dropped. + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // 2. Ensure known alertmanagers are not dropped during ApplyConfig even when + // the config order changes. + s = ` +alerting: + alertmanagers: + - static_configs: + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 1) + // Ensure no unnecessary alertmanagers are injected. + require.Empty(t, n.alertmanagers["config-0"].ams) + // Ensure the config order is taken into account. + ams := n.alertmanagers["config-1"].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + + // 3. Ensure known alertmanagers are reused for new config with identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 2) + for cfgIdx := range 2 { + ams := n.alertmanagers[fmt.Sprintf("config-%d", cfgIdx)].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + } + + // 4. Ensure known alertmanagers are reused only for identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + path_prefix: /bar + - file_sd_configs: + - files: + - foo.json + relabel_configs: + - source_labels: ['__address__'] + regex: 'doesntmatter:1234' + action: drop +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Empty(t, n.Alertmanagers()) +} diff --git a/promql/bench_test.go b/promql/bench_test.go index 74e85b0548..943baceecb 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" @@ -117,7 +119,7 @@ func rangeQueryCases() []benchCase { }, // Holt-Winters and long ranges. { - expr: "holt_winters(a_X[1d], 0.3, 0.3)", + expr: "double_exponential_smoothing(a_X[1d], 0.3, 0.3)", }, { expr: "changes(a_X[1d])", @@ -380,6 +382,126 @@ func BenchmarkNativeHistograms(b *testing.B) { } } +func BenchmarkInfoFunction(b *testing.B) { + // Initialize test storage and generate test series data. + testStorage := teststorage.New(b) + defer testStorage.Close() + + start := time.Unix(0, 0) + end := start.Add(2 * time.Hour) + step := 30 * time.Second + + // Generate time series data for the benchmark. + generateInfoFunctionTestSeries(b, testStorage, 100, 2000, 3600) + + // Define test cases with queries to benchmark. + cases := []struct { + name string + query string + }{ + { + name: "Joining info metrics with other metrics with group_left example 1", + query: "rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info{k8s_cluster_name=\"us-east\"}", + }, + { + name: "Joining info metrics with other metrics with info() example 1", + query: `info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name="us-east"})`, + }, + { + name: "Joining info metrics with other metrics with group_left example 2", + query: "sum by (k8s_cluster_name, http_status_code) (rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info)", + }, + { + name: "Joining info metrics with other metrics with info() example 2", + query: `sum by (k8s_cluster_name, http_status_code) (info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name=~".+"}))`, + }, + } + + // Benchmark each query type. + for _, tc := range cases { + // Initialize the PromQL engine once for all benchmarks. + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 50000000, + Timeout: 100 * time.Second, + EnableAtModifier: true, + EnableNegativeOffset: true, + } + engine := promql.NewEngine(opts) + b.Run(tc.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() // Stop the timer to exclude setup time. + qry, err := engine.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) + require.NoError(b, err) + b.StartTimer() + result := qry.Exec(context.Background()) + require.NoError(b, result.Err) + } + }) + } + + // Report allocations. + b.ReportAllocs() +} + +// Helper function to generate target_info and http_server_request_duration_seconds_count series for info function benchmarking. +func generateInfoFunctionTestSeries(tb testing.TB, stor *teststorage.TestStorage, infoSeriesNum, interval, numIntervals int) { + tb.Helper() + + ctx := context.Background() + statusCodes := []string{"200", "400", "500"} + + // Generate target_info metrics with instance and job labels, and k8s_cluster_name label. + // Generate http_server_request_duration_seconds_count metrics with instance and job labels, and http_status_code label. + // the classic target_info metrics is gauge type. + metrics := make([]labels.Labels, 0, infoSeriesNum+len(statusCodes)) + for i := 0; i < infoSeriesNum; i++ { + clusterName := "us-east" + if i >= infoSeriesNum/2 { + clusterName = "eu-south" + } + metrics = append(metrics, labels.FromStrings( + "__name__", "target_info", + "instance", "instance"+strconv.Itoa(i), + "job", "job"+strconv.Itoa(i), + "k8s_cluster_name", clusterName, + )) + } + + for _, statusCode := range statusCodes { + metrics = append(metrics, labels.FromStrings( + "__name__", "http_server_request_duration_seconds_count", + "instance", "instance0", + "job", "job0", + "http_status_code", statusCode, + )) + } + + // Append the generated metrics and samples to the storage. + refs := make([]storage.SeriesRef, len(metrics)) + + for i := 0; i < numIntervals; i++ { + a := stor.Appender(context.Background()) + ts := int64(i * interval) + for j, metric := range metrics[:infoSeriesNum] { + ref, _ := a.Append(refs[j], metric, ts, 1) + refs[j] = ref + } + + for j, metric := range metrics[infoSeriesNum:] { + ref, _ := a.Append(refs[j+infoSeriesNum], metric, ts, float64(i)) + refs[j+infoSeriesNum] = ref + } + + require.NoError(tb, a.Commit()) + } + + stor.DB.ForceHeadMMap() // Ensure we have at most one head chunk for every series. + stor.DB.Compact(ctx) +} + func generateNativeHistogramSeries(app storage.Appender, numSeries int) error { commonLabels := []string{labels.MetricName, "native_histogram_series", "foo", "bar"} series := make([][]*histogram.Histogram, numSeries) diff --git a/promql/engine.go b/promql/engine.go index 1a5f828aed..56323748fe 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "reflect" "runtime" @@ -30,10 +31,9 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -43,6 +43,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/util/annotations" @@ -125,7 +126,11 @@ type QueryEngine interface { // QueryLogger is an interface that can be used to log all the queries logged // by the engine. type QueryLogger interface { - Log(...interface{}) error + Error(msg string, args ...any) + Info(msg string, args ...any) + Debug(msg string, args ...any) + Warn(msg string, args ...any) + With(args ...any) Close() error } @@ -288,7 +293,7 @@ type QueryTracker interface { // EngineOpts contains configuration options used when creating a new Engine. type EngineOpts struct { - Logger log.Logger + Logger *slog.Logger Reg prometheus.Registerer MaxSamples int Timeout time.Duration @@ -326,7 +331,7 @@ type EngineOpts struct { // Engine handles the lifetime of queries from beginning to end. // It is connected to a querier. type Engine struct { - logger log.Logger + logger *slog.Logger metrics *engineMetrics timeout time.Duration maxSamplesPerQuery int @@ -344,7 +349,7 @@ type Engine struct { // NewEngine returns a new engine. func NewEngine(opts EngineOpts) *Engine { if opts.Logger == nil { - opts.Logger = log.NewNopLogger() + opts.Logger = promslog.NewNopLogger() } queryResultSummary := prometheus.NewSummaryVec(prometheus.SummaryOpts{ @@ -403,7 +408,7 @@ func NewEngine(opts EngineOpts) *Engine { if opts.LookbackDelta == 0 { opts.LookbackDelta = defaultLookbackDelta if l := opts.Logger; l != nil { - level.Debug(l).Log("msg", "Lookback delta is zero, setting to default value", "value", defaultLookbackDelta) + l.Debug("Lookback delta is zero, setting to default value", "value", defaultLookbackDelta) } } @@ -455,7 +460,7 @@ func (ng *Engine) SetQueryLogger(l QueryLogger) { // not make reload fail; only log a warning. err := ng.queryLogger.Close() if err != nil { - level.Warn(ng.logger).Log("msg", "Error while closing the previous query log file", "err", err) + ng.logger.Warn("Error while closing the previous query log file", "err", err) } } @@ -632,23 +637,23 @@ func (ng *Engine) exec(ctx context.Context, q *query) (v parser.Value, ws annota // The step provided by the user is in seconds. params["step"] = int64(eq.Interval / (time.Second / time.Nanosecond)) } - f := []interface{}{"params", params} + l.With("params", params) if err != nil { - f = append(f, "error", err) + l.With("error", err) } - f = append(f, "stats", stats.NewQueryStats(q.Stats())) + l.With("stats", stats.NewQueryStats(q.Stats())) if span := trace.SpanFromContext(ctx); span != nil { - f = append(f, "spanID", span.SpanContext().SpanID()) + l.With("spanID", span.SpanContext().SpanID()) } if origin := ctx.Value(QueryOrigin{}); origin != nil { for k, v := range origin.(map[string]interface{}) { - f = append(f, k, v) + l.With(k, v) } } - if err := l.Log(f...); err != nil { - ng.metrics.queryLogFailures.Inc() - level.Error(ng.logger).Log("msg", "can't log query", "err", err) - } + l.Info("promql query logged") + // TODO: @tjhop -- do we still need this metric/error log if logger doesn't return errors? + // ng.metrics.queryLogFailures.Inc() + // ng.logger.Error("can't log query", "err", err) } ng.queryLoggerLock.RUnlock() }() @@ -734,6 +739,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(start, start, 1) @@ -792,6 +798,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval) val, warnings, err := evaluator.Eval(ctxInnerEval, s.Expr) @@ -1004,6 +1011,8 @@ func extractGroupsFromPath(p []parser.Node) (bool, []string) { return false, nil } +// checkAndExpandSeriesSet expands expr's UnexpandedSeriesSet into expr's Series. +// If the Series field is already non-nil, it's a no-op. func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations.Annotations, error) { switch e := expr.(type) { case *parser.MatrixSelector: @@ -1057,11 +1066,12 @@ type evaluator struct { maxSamples int currentSamples int - logger log.Logger + logger *slog.Logger lookbackDelta time.Duration samplesStats *stats.QuerySamples noStepSubqueryIntervalFn func(rangeMillis int64) int64 enableDelayedNameRemoval bool + querier storage.Querier } // errorf causes a panic with the input formatted into an error. @@ -1087,7 +1097,7 @@ func (ev *evaluator) recover(expr parser.Expr, ws *annotations.Annotations, errp buf := make([]byte, 64<<10) buf = buf[:runtime.Stack(buf, false)] - level.Error(ev.logger).Log("msg", "runtime panic during query evaluation", "expr", expr.String(), "err", e, "stacktrace", string(buf)) + ev.logger.Error("runtime panic during query evaluation", "expr", expr.String(), "err", e, "stacktrace", string(buf)) *errp = fmt.Errorf("unexpected error: %w", err) case errWithWarnings: *errp = err.err @@ -1223,38 +1233,17 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label ev.currentSamples = tempNumSamples // Gather input vectors for this timestamp. for i := range exprs { - vectors[i] = vectors[i][:0] - + var bh []EvalSeriesHelper + var sh []EvalSeriesHelper if prepSeries != nil { - bufHelpers[i] = bufHelpers[i][:0] - } - - for si, series := range matrixes[i] { - switch { - case len(series.Floats) > 0 && series.Floats[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts, DropName: series.DropName}) - // Move input vectors forward so we don't have to re-scan the same - // past points at the next step. - matrixes[i][si].Floats = series.Floats[1:] - case len(series.Histograms) > 0 && series.Histograms[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts, DropName: series.DropName}) - matrixes[i][si].Histograms = series.Histograms[1:] - default: - continue - } - if prepSeries != nil { - bufHelpers[i] = append(bufHelpers[i], seriesHelpers[i][si]) - } - // Don't add histogram size here because we only - // copy the pointer above, not the whole - // histogram. - ev.currentSamples++ - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } + bh = bufHelpers[i][:0] + sh = seriesHelpers[i] } + vectors[i], bh = ev.gatherVector(ts, matrixes[i], vectors[i], bh, sh) args[i] = vectors[i] - ev.samplesStats.UpdatePeak(ev.currentSamples) + if prepSeries != nil { + bufHelpers[i] = bh + } } // Make the function call. @@ -1455,6 +1444,79 @@ func (ev *evaluator) rangeEvalAgg(ctx context.Context, aggExpr *parser.Aggregate return result, warnings } +// evalSeries generates a Matrix between ev.startTimestamp and ev.endTimestamp (inclusive), each point spaced ev.interval apart, from series given offset. +// For every storage.Series iterator in series, the method iterates in ev.interval sized steps from ev.startTimestamp until and including ev.endTimestamp, +// collecting every corresponding sample (obtained via ev.vectorSelectorSingle) into a Series. +// All of the generated Series are collected into a Matrix, that gets returned. +func (ev *evaluator) evalSeries(ctx context.Context, series []storage.Series, offset time.Duration, recordOrigT bool) Matrix { + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + + mat := make(Matrix, 0, len(series)) + var prevSS *Series + it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) + var chkIter chunkenc.Iterator + for _, s := range series { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + chkIter = s.Iterator(chkIter) + it.Reset(chkIter) + ss := Series{ + Metric: s.Labels(), + } + + for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { + step++ + origT, f, h, ok := ev.vectorSelectorSingle(it, offset, ts) + if !ok { + continue + } + + if h == nil { + ev.currentSamples++ + ev.samplesStats.IncrementSamplesAtStep(step, 1) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Floats == nil { + ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) + } + if recordOrigT { + // This is an info metric, where we want to track the original sample timestamp. + // Info metric values should be 1 by convention, therefore we can re-use this + // space in the sample. + f = float64(origT) + } + ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) + } else { + if recordOrigT { + ev.error(fmt.Errorf("this should be an info metric, with float samples: %s", ss.Metric)) + } + + point := HPoint{H: h, T: ts} + histSize := point.size() + ev.currentSamples += histSize + ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Histograms == nil { + ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) + } + ss.Histograms = append(ss.Histograms, point) + } + } + + if len(ss.Floats)+len(ss.Histograms) > 0 { + mat = append(mat, ss) + prevSS = &mat[len(mat)-1] + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + return mat +} + // evalSubquery evaluates given SubqueryExpr and returns an equivalent // evaluated MatrixSelector in its place. Note that the Name and LabelMatchers are not set. func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, annotations.Annotations) { @@ -1601,6 +1663,8 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, return ev.evalLabelReplace(ctx, e.Args) case "label_join": return ev.evalLabelJoin(ctx, e.Args) + case "info": + return ev.evalInfo(ctx, e.Args) } if !matrixArg { @@ -1742,9 +1806,8 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, ev.samplesStats.UpdatePeak(ev.currentSamples) if e.Func.Name == "rate" || e.Func.Name == "increase" { - samples := inMatrix[0] - metricName := samples.Metric.Get(labels.MetricName) - if metricName != "" && len(samples.Floats) > 0 && + metricName := inMatrix[0].Metric.Get(labels.MetricName) + if metricName != "" && len(ss.Floats) > 0 && !strings.HasSuffix(metricName, "_total") && !strings.HasSuffix(metricName, "_sum") && !strings.HasSuffix(metricName, "_count") && @@ -1821,6 +1884,9 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, for j := range mat[i].Floats { mat[i].Floats[j].F = -mat[i].Floats[j].F } + for j := range mat[i].Histograms { + mat[i].Histograms[j].H = mat[i].Histograms[j].H.Copy().Mul(-1) + } } if !ev.enableDelayedNameRemoval && mat.ContainsSameLabelset() { ev.errorf("vector cannot contain metrics with the same labelset") @@ -1857,20 +1923,20 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, }, e.LHS, e.RHS) default: return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh) + vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } case lt == parser.ValueTypeVector && rt == parser.ValueTypeScalar: return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh) + vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) case lt == parser.ValueTypeScalar && rt == parser.ValueTypeVector: return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh) + vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } @@ -1890,56 +1956,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } - mat := make(Matrix, 0, len(e.Series)) - var prevSS *Series - it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) - var chkIter chunkenc.Iterator - for i, s := range e.Series { - if err := contextDone(ctx, "expression evaluation"); err != nil { - ev.error(err) - } - chkIter = s.Iterator(chkIter) - it.Reset(chkIter) - ss := Series{ - Metric: e.Series[i].Labels(), - } - - for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { - step++ - _, f, h, ok := ev.vectorSelectorSingle(it, e, ts) - if ok { - if h == nil { - ev.currentSamples++ - ev.samplesStats.IncrementSamplesAtStep(step, 1) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Floats == nil { - ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) - } - ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) - } else { - point := HPoint{H: h, T: ts} - histSize := point.size() - ev.currentSamples += histSize - ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Histograms == nil { - ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) - } - ss.Histograms = append(ss.Histograms, point) - } - } - } - - if len(ss.Floats)+len(ss.Histograms) > 0 { - mat = append(mat, ss) - prevSS = &mat[len(mat)-1] - } - } - ev.samplesStats.UpdatePeak(ev.currentSamples) + mat := ev.evalSeries(ctx, e.Series, e.Offset, false) return mat, ws case *parser.MatrixSelector: @@ -1960,6 +1977,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } if e.Step != 0 { @@ -2004,6 +2022,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } res, ws := newEv.eval(ctx, e.Expr) ev.currentSamples = newEv.currentSamples @@ -2104,7 +2123,7 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Co vec := make(Vector, 0, len(vs.Series)) for i, s := range vs.Series { it := seriesIterators[i] - t, _, _, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) + t, _, _, ok := ev.vectorSelectorSingle(it, vs.Offset, enh.Ts) if !ok { continue } @@ -2128,10 +2147,10 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Co } // vectorSelectorSingle evaluates an instant vector for the iterator of one time series. -func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, node *parser.VectorSelector, ts int64) ( +func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, offset time.Duration, ts int64) ( int64, float64, *histogram.FloatHistogram, bool, ) { - refTime := ts - durationMilliseconds(node.Offset) + refTime := ts - durationMilliseconds(offset) var t int64 var v float64 var h *histogram.FloatHistogram @@ -2511,7 +2530,7 @@ func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatchi } // VectorBinop evaluates a binary operation between two Vectors, excluding set operators. -func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { if matching.Card == parser.CardManyToMany { panic("many-to-many only allowed for set operators") } @@ -2585,7 +2604,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * fl, fr = fr, fl hl, hr = hr, hl } - floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr) + floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr, pos) if err != nil { lastErr = err } @@ -2694,7 +2713,7 @@ func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.V } // VectorscalarBinop evaluates a binary operation between a Vector and a Scalar. -func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { var lastErr error for _, lhsSample := range lhs { lf, rf := lhsSample.F, rhs.V @@ -2706,7 +2725,7 @@ func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scala lf, rf = rf, lf lh, rh = rh, lh } - float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh) + float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh, pos) if err != nil { lastErr = err } @@ -2773,7 +2792,7 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 { } // vectorElemBinop evaluates a binary operation between two Vector elements. -func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram) (float64, *histogram.FloatHistogram, bool, error) { +func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram, pos posrange.PositionRange) (float64, *histogram.FloatHistogram, bool, error) { switch op { case parser.ADD: if hlhs != nil && hrhs != nil { @@ -2783,7 +2802,13 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram } return 0, res.Compact(0), true, nil } - return lhs + rhs, nil, true, nil + if hlhs == nil && hrhs == nil { + return lhs + rhs, nil, true, nil + } + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "+", "float", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "+", "histogram", pos) case parser.SUB: if hlhs != nil && hrhs != nil { res, err := hlhs.Copy().Sub(hrhs) @@ -2792,7 +2817,13 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram } return 0, res.Compact(0), true, nil } - return lhs - rhs, nil, true, nil + if hlhs == nil && hrhs == nil { + return lhs - rhs, nil, true, nil + } + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "-", "float", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "-", "histogram", pos) case parser.MUL: if hlhs != nil && hrhs == nil { return 0, hlhs.Copy().Mul(rhs), true, nil @@ -2800,11 +2831,20 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram if hlhs == nil && hrhs != nil { return 0, hrhs.Copy().Mul(lhs), true, nil } + if hlhs != nil && hrhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "*", "histogram", pos) + } return lhs * rhs, nil, true, nil case parser.DIV: if hlhs != nil && hrhs == nil { return 0, hlhs.Copy().Div(rhs), true, nil } + if hrhs != nil { + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "/", "histogram", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "/", "histogram", pos) + } return lhs / rhs, nil, true, nil case parser.POW: return math.Pow(lhs, rhs), nil, true, nil @@ -2881,7 +2921,15 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix group.hasHistogram = true } case parser.STDVAR, parser.STDDEV: - group.floatValue = 0 + switch { + case h != nil: + // Ignore histograms for STDVAR and STDDEV. + group.seen = false + case math.IsNaN(f), math.IsInf(f, 0): + group.floatValue = math.NaN() + default: + group.floatValue = 0 + } case parser.QUANTILE: group.heap = make(vectorByValueHeap, 1) group.heap[0] = Sample{F: f} @@ -3342,6 +3390,9 @@ func handleVectorBinopError(err error, e *parser.BinaryExpr) annotations.Annotat } metricName := "" pos := e.PositionRange() + if errors.Is(err, annotations.PromQLInfo) || errors.Is(err, annotations.PromQLWarning) { + return annotations.New().Add(err) + } if errors.Is(err, histogram.ErrHistogramsIncompatibleSchema) { return annotations.New().Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos)) } else if errors.Is(err, histogram.ErrHistogramsIncompatibleBounds) { @@ -3668,3 +3719,41 @@ func newHistogramStatsSeries(series storage.Series) *histogramStatsSeries { func (s histogramStatsSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { return NewHistogramStatsIterator(s.Series.Iterator(it)) } + +// gatherVector gathers a Vector for ts from the series in input. +// output is used as a buffer. +// If bufHelpers and seriesHelpers are provided, seriesHelpers[i] is appended to bufHelpers for every input index i. +// The gathered Vector and bufHelper are returned. +func (ev *evaluator) gatherVector(ts int64, input Matrix, output Vector, bufHelpers, seriesHelpers []EvalSeriesHelper) (Vector, []EvalSeriesHelper) { + output = output[:0] + for i, series := range input { + switch { + case len(series.Floats) > 0 && series.Floats[0].T == ts: + s := series.Floats[0] + output = append(output, Sample{Metric: series.Metric, F: s.F, T: ts, DropName: series.DropName}) + // Move input vectors forward so we don't have to re-scan the same + // past points at the next step. + input[i].Floats = series.Floats[1:] + case len(series.Histograms) > 0 && series.Histograms[0].T == ts: + s := series.Histograms[0] + output = append(output, Sample{Metric: series.Metric, H: s.H, T: ts, DropName: series.DropName}) + input[i].Histograms = series.Histograms[1:] + default: + continue + } + if len(seriesHelpers) > 0 { + bufHelpers = append(bufHelpers, seriesHelpers[i]) + } + + // Don't add histogram size here because we only + // copy the pointer above, not the whole + // histogram. + ev.currentSamples++ + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + + return output, bufHelpers +} diff --git a/promql/engine_internal_test.go b/promql/engine_internal_test.go index cb501b2fdf..0962c218c7 100644 --- a/promql/engine_internal_test.go +++ b/promql/engine_internal_test.go @@ -14,22 +14,21 @@ package promql import ( + "bytes" "errors" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/util/annotations" ) func TestRecoverEvaluatorRuntime(t *testing.T) { - var output []interface{} - logger := log.Logger(log.LoggerFunc(func(keyvals ...interface{}) error { - output = append(output, keyvals...) - return nil - })) + var output bytes.Buffer + logger := promslog.New(&promslog.Config{Writer: &output}) ev := &evaluator{logger: logger} expr, _ := parser.ParseExpr("sum(up)") @@ -38,7 +37,7 @@ func TestRecoverEvaluatorRuntime(t *testing.T) { defer func() { require.EqualError(t, err, "unexpected error: runtime error: index out of range [123] with length 0") - require.Contains(t, output, "sum(up)") + require.Contains(t, output.String(), "sum(up)") }() defer ev.recover(expr, nil, &err) @@ -48,7 +47,7 @@ func TestRecoverEvaluatorRuntime(t *testing.T) { } func TestRecoverEvaluatorError(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} + ev := &evaluator{logger: promslog.NewNopLogger()} var err error e := errors.New("custom error") @@ -62,7 +61,7 @@ func TestRecoverEvaluatorError(t *testing.T) { } func TestRecoverEvaluatorErrorWithWarnings(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} + ev := &evaluator{logger: promslog.NewNopLogger()} var err error var ws annotations.Annotations diff --git a/promql/engine_test.go b/promql/engine_test.go index d5daa72af9..7c398029f5 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -17,8 +17,10 @@ import ( "context" "errors" "fmt" + "math" "sort" "strconv" + "strings" "sync" "testing" "time" @@ -28,11 +30,13 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/teststorage" @@ -1467,7 +1471,7 @@ load 10s }, { // Nested subquery. - // Now the outmost subquery produces more samples than inner most rate. + // Now the outermost subquery produces more samples than inner most rate. Query: `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[17s:1s] @ 2000`, MaxSamples: 36, Start: time.Unix(10, 0), @@ -2014,23 +2018,58 @@ func TestSubquerySelector(t *testing.T) { type FakeQueryLogger struct { closed bool logs []interface{} + attrs []any } func NewFakeQueryLogger() *FakeQueryLogger { return &FakeQueryLogger{ closed: false, logs: make([]interface{}, 0), + attrs: make([]any, 0), } } +// It implements the promql.QueryLogger interface. func (f *FakeQueryLogger) Close() error { f.closed = true return nil } -func (f *FakeQueryLogger) Log(l ...interface{}) error { - f.logs = append(f.logs, l...) - return nil +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Info(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Error(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Warn(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Debug(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) With(args ...any) { + f.attrs = append(f.attrs, args...) } func TestQueryLogger_basic(t *testing.T) { @@ -2058,9 +2097,8 @@ func TestQueryLogger_basic(t *testing.T) { f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) queryExec() - for i, field := range []interface{}{"params", map[string]interface{}{"query": "test statement"}} { - require.Equal(t, field, f1.logs[i]) - } + require.Contains(t, f1.logs, `params`) + require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"}) l := len(f1.logs) queryExec() @@ -2106,11 +2144,8 @@ func TestQueryLogger_fields(t *testing.T) { res := query.Exec(ctx) require.NoError(t, res.Err) - expected := []string{"foo", "bar"} - for i, field := range expected { - v := f1.logs[len(f1.logs)-len(expected)+i].(string) - require.Equal(t, field, v) - } + require.Contains(t, f1.logs, `foo`) + require.Contains(t, f1.logs, `bar`) } func TestQueryLogger_error(t *testing.T) { @@ -2136,9 +2171,10 @@ func TestQueryLogger_error(t *testing.T) { res := query.Exec(ctx) require.Error(t, res.Err, "query should have failed") - for i, field := range []interface{}{"params", map[string]interface{}{"query": "test statement"}, "error", testErr} { - require.Equal(t, f1.logs[i], field) - } + require.Contains(t, f1.logs, `params`) + require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"}) + require.Contains(t, f1.logs, `error`) + require.Contains(t, f1.logs, testErr) } func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { @@ -3708,3 +3744,187 @@ histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum: }, }) } + +func TestRateAnnotations(t *testing.T) { + testCases := map[string]struct { + data string + expr string + expectedWarningAnnotations []string + expectedInfoAnnotations []string + }{ + "info annotation when two samples are selected": { + data: ` + series 1 2 + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{ + `PromQL info: metric might not be a counter, name does not end in _total/_sum/_count/_bucket: "series" (1:6)`, + }, + }, + "no info annotations when no samples": { + data: ` + series + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{}, + }, + "no info annotations when selecting one sample": { + data: ` + series 1 2 + `, + expr: "rate(series[10s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{}, + }, + "no info annotations when no samples due to mixed data types": { + data: ` + series{label="a"} 1 {{schema:1 sum:15 count:10 buckets:[1 2 3]}} + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{ + `PromQL warning: encountered a mix of histograms and floats for metric name "series" (1:6)`, + }, + expectedInfoAnnotations: []string{}, + }, + "no info annotations when selecting two native histograms": { + data: ` + series{label="a"} {{schema:1 sum:10 count:5 buckets:[1 2 3]}} {{schema:1 sum:15 count:10 buckets:[1 2 3]}} + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{}, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + store := promqltest.LoadedStorage(t, "load 1m\n"+strings.TrimSpace(testCase.data)) + t.Cleanup(func() { _ = store.Close() }) + + engine := newTestEngine(t) + query, err := engine.NewInstantQuery(context.Background(), store, nil, testCase.expr, timestamp.Time(0).Add(1*time.Minute)) + require.NoError(t, err) + t.Cleanup(query.Close) + + res := query.Exec(context.Background()) + require.NoError(t, res.Err) + + warnings, infos := res.Warnings.AsStrings(testCase.expr, 0, 0) + testutil.RequireEqual(t, testCase.expectedWarningAnnotations, warnings) + testutil.RequireEqual(t, testCase.expectedInfoAnnotations, infos) + }) + } +} + +func TestHistogramRateWithFloatStaleness(t *testing.T) { + // Make a chunk with two normal histograms of the same value. + h1 := histogram.Histogram{ + Schema: 2, + Count: 10, + Sum: 100, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{100}, + } + + c1 := chunkenc.NewHistogramChunk() + app, err := c1.Appender() + require.NoError(t, err) + var ( + newc chunkenc.Chunk + recoded bool + ) + + newc, recoded, app, err = app.AppendHistogram(nil, 0, h1.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + newc, recoded, _, err = app.AppendHistogram(nil, 10, h1.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + // Make a chunk with a single float stale marker. + c2 := chunkenc.NewXORChunk() + app, err = c2.Appender() + require.NoError(t, err) + + app.Append(20, math.Float64frombits(value.StaleNaN)) + + // Make a chunk with two normal histograms that have zero value. + h2 := histogram.Histogram{ + Schema: 2, + } + + c3 := chunkenc.NewHistogramChunk() + app, err = c3.Appender() + require.NoError(t, err) + + newc, recoded, app, err = app.AppendHistogram(nil, 30, h2.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + newc, recoded, _, err = app.AppendHistogram(nil, 40, h2.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + querier := storage.MockQuerier{ + SelectMockFunction: func(_ bool, _ *storage.SelectHints, _ ...*labels.Matcher) storage.SeriesSet { + return &singleSeriesSet{ + series: mockSeries{chunks: []chunkenc.Chunk{c1, c2, c3}, labelSet: []string{"__name__", "foo"}}, + } + }, + } + + queriable := storage.MockQueryable{MockQuerier: &querier} + + engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) + + q, err := engine.NewInstantQuery(context.Background(), &queriable, nil, "rate(foo[40s])", timestamp.Time(45)) + require.NoError(t, err) + defer q.Close() + + res := q.Exec(context.Background()) + require.NoError(t, res.Err) + + vec, err := res.Vector() + require.NoError(t, err) + + // Single sample result. + require.Len(t, vec, 1) + // The result is a histogram. + require.NotNil(t, vec[0].H) + // The result should be zero as the histogram has not increased, so the rate is zero. + require.Equal(t, 0.0, vec[0].H.Count) + require.Equal(t, 0.0, vec[0].H.Sum) +} + +type singleSeriesSet struct { + series storage.Series + consumed bool +} + +func (s *singleSeriesSet) Next() bool { c := s.consumed; s.consumed = true; return !c } +func (s singleSeriesSet) At() storage.Series { return s.series } +func (s singleSeriesSet) Err() error { return nil } +func (s singleSeriesSet) Warnings() annotations.Annotations { return nil } + +type mockSeries struct { + chunks []chunkenc.Chunk + labelSet []string +} + +func (s mockSeries) Labels() labels.Labels { + return labels.FromStrings(s.labelSet...) +} + +func (s mockSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { + iterables := []chunkenc.Iterator{} + for _, c := range s.chunks { + iterables = append(iterables, c.Iterator(nil)) + } + return storage.ChainSampleIteratorFromIterators(it, iterables) +} diff --git a/promql/functions.go b/promql/functions.go index 182b69b080..e93a4cdc5b 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -350,7 +350,7 @@ func calcTrendValue(i int, tf, s0, s1, b float64) float64 { // data. A lower smoothing factor increases the influence of historical data. The trend factor (0 < tf < 1) affects // how trends in historical data will affect the current data. A higher trend factor increases the influence. // of trends. Algorithm taken from https://en.wikipedia.org/wiki/Exponential_smoothing titled: "Double exponential smoothing". -func funcHoltWinters(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { +func funcDoubleExponentialSmoothing(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { samples := vals[0].(Matrix)[0] // The smoothing factor argument. @@ -415,22 +415,12 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(a.Metric, b.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -442,7 +432,8 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode return +1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -450,22 +441,12 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(b.Metric, a.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -477,7 +458,8 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval return -1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return -labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -551,6 +533,10 @@ func funcRound(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper toNearestInverse := 1.0 / toNearest for _, el := range vec { + if el.H != nil { + // Process only float samples. + continue + } f := math.Floor(el.F*toNearestInverse+0.5) / toNearestInverse enh.Out = append(enh.Out, Sample{ Metric: el.Metric, @@ -1480,7 +1466,7 @@ func (ev *evaluator) evalLabelReplace(ctx context.Context, args parser.Expressio regexStr = stringFromArg(args[4]) ) - regex, err := regexp.Compile("^(?:" + regexStr + ")$") + regex, err := regexp.Compile("^(?s:" + regexStr + ")$") if err != nil { panic(fmt.Errorf("invalid regular expression in label_replace(): %s", regexStr)) } @@ -1514,11 +1500,6 @@ func (ev *evaluator) evalLabelReplace(ctx context.Context, args parser.Expressio return matrix, ws } -// === label_replace(Vector parser.ValueTypeVector, dst_label, replacement, src_labelname, regex parser.ValueTypeString) (Vector, Annotations) === -func funcLabelReplace(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - panic("funcLabelReplace wrong implementation called") -} - // === Vector(s Scalar) (Vector, Annotations) === func funcVector(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return append(enh.Out, @@ -1570,11 +1551,6 @@ func (ev *evaluator) evalLabelJoin(ctx context.Context, args parser.Expressions) return matrix, ws } -// === label_join(vector model.ValVector, dest_labelname, separator, src_labelname...) (Vector, Annotations) === -func funcLabelJoin(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - panic("funcLabelReplace wrong implementation called") -} - // Common code for date related functions. func dateWrapper(vals []parser.Value, enh *EvalNodeHelper, f func(time.Time) float64) Vector { if len(vals) == 0 { @@ -1657,82 +1633,83 @@ func funcYear(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) // FunctionCalls is a list of all functions supported by PromQL, including their types. var FunctionCalls = map[string]FunctionCall{ - "abs": funcAbs, - "absent": funcAbsent, - "absent_over_time": funcAbsentOverTime, - "acos": funcAcos, - "acosh": funcAcosh, - "asin": funcAsin, - "asinh": funcAsinh, - "atan": funcAtan, - "atanh": funcAtanh, - "avg_over_time": funcAvgOverTime, - "ceil": funcCeil, - "changes": funcChanges, - "clamp": funcClamp, - "clamp_max": funcClampMax, - "clamp_min": funcClampMin, - "cos": funcCos, - "cosh": funcCosh, - "count_over_time": funcCountOverTime, - "days_in_month": funcDaysInMonth, - "day_of_month": funcDayOfMonth, - "day_of_week": funcDayOfWeek, - "day_of_year": funcDayOfYear, - "deg": funcDeg, - "delta": funcDelta, - "deriv": funcDeriv, - "exp": funcExp, - "floor": funcFloor, - "histogram_avg": funcHistogramAvg, - "histogram_count": funcHistogramCount, - "histogram_fraction": funcHistogramFraction, - "histogram_quantile": funcHistogramQuantile, - "histogram_sum": funcHistogramSum, - "histogram_stddev": funcHistogramStdDev, - "histogram_stdvar": funcHistogramStdVar, - "holt_winters": funcHoltWinters, - "hour": funcHour, - "idelta": funcIdelta, - "increase": funcIncrease, - "irate": funcIrate, - "label_replace": funcLabelReplace, - "label_join": funcLabelJoin, - "ln": funcLn, - "log10": funcLog10, - "log2": funcLog2, - "last_over_time": funcLastOverTime, - "mad_over_time": funcMadOverTime, - "max_over_time": funcMaxOverTime, - "min_over_time": funcMinOverTime, - "minute": funcMinute, - "month": funcMonth, - "pi": funcPi, - "predict_linear": funcPredictLinear, - "present_over_time": funcPresentOverTime, - "quantile_over_time": funcQuantileOverTime, - "rad": funcRad, - "rate": funcRate, - "resets": funcResets, - "round": funcRound, - "scalar": funcScalar, - "sgn": funcSgn, - "sin": funcSin, - "sinh": funcSinh, - "sort": funcSort, - "sort_desc": funcSortDesc, - "sort_by_label": funcSortByLabel, - "sort_by_label_desc": funcSortByLabelDesc, - "sqrt": funcSqrt, - "stddev_over_time": funcStddevOverTime, - "stdvar_over_time": funcStdvarOverTime, - "sum_over_time": funcSumOverTime, - "tan": funcTan, - "tanh": funcTanh, - "time": funcTime, - "timestamp": funcTimestamp, - "vector": funcVector, - "year": funcYear, + "abs": funcAbs, + "absent": funcAbsent, + "absent_over_time": funcAbsentOverTime, + "acos": funcAcos, + "acosh": funcAcosh, + "asin": funcAsin, + "asinh": funcAsinh, + "atan": funcAtan, + "atanh": funcAtanh, + "avg_over_time": funcAvgOverTime, + "ceil": funcCeil, + "changes": funcChanges, + "clamp": funcClamp, + "clamp_max": funcClampMax, + "clamp_min": funcClampMin, + "cos": funcCos, + "cosh": funcCosh, + "count_over_time": funcCountOverTime, + "days_in_month": funcDaysInMonth, + "day_of_month": funcDayOfMonth, + "day_of_week": funcDayOfWeek, + "day_of_year": funcDayOfYear, + "deg": funcDeg, + "delta": funcDelta, + "deriv": funcDeriv, + "exp": funcExp, + "floor": funcFloor, + "histogram_avg": funcHistogramAvg, + "histogram_count": funcHistogramCount, + "histogram_fraction": funcHistogramFraction, + "histogram_quantile": funcHistogramQuantile, + "histogram_sum": funcHistogramSum, + "histogram_stddev": funcHistogramStdDev, + "histogram_stdvar": funcHistogramStdVar, + "double_exponential_smoothing": funcDoubleExponentialSmoothing, + "hour": funcHour, + "idelta": funcIdelta, + "increase": funcIncrease, + "info": nil, + "irate": funcIrate, + "label_replace": nil, // evalLabelReplace not called via this map. + "label_join": nil, // evalLabelJoin not called via this map. + "ln": funcLn, + "log10": funcLog10, + "log2": funcLog2, + "last_over_time": funcLastOverTime, + "mad_over_time": funcMadOverTime, + "max_over_time": funcMaxOverTime, + "min_over_time": funcMinOverTime, + "minute": funcMinute, + "month": funcMonth, + "pi": funcPi, + "predict_linear": funcPredictLinear, + "present_over_time": funcPresentOverTime, + "quantile_over_time": funcQuantileOverTime, + "rad": funcRad, + "rate": funcRate, + "resets": funcResets, + "round": funcRound, + "scalar": funcScalar, + "sgn": funcSgn, + "sin": funcSin, + "sinh": funcSinh, + "sort": funcSort, + "sort_desc": funcSortDesc, + "sort_by_label": funcSortByLabel, + "sort_by_label_desc": funcSortByLabelDesc, + "sqrt": funcSqrt, + "stddev_over_time": funcStddevOverTime, + "stdvar_over_time": funcStdvarOverTime, + "sum_over_time": funcSumOverTime, + "tan": funcTan, + "tanh": funcTanh, + "time": funcTime, + "timestamp": funcTimestamp, + "vector": funcVector, + "year": funcYear, } // AtModifierUnsafeFunctions are the functions whose result diff --git a/promql/fuzz.go b/promql/fuzz.go index 5f08e6a72c..759055fb0d 100644 --- a/promql/fuzz.go +++ b/promql/fuzz.go @@ -61,17 +61,13 @@ const ( var symbolTable = labels.NewSymbolTable() func fuzzParseMetricWithContentType(in []byte, contentType string) int { - p, warning := textparse.New(in, contentType, false, symbolTable) - if warning != nil { + p, warning := textparse.New(in, contentType, "", false, false, symbolTable) + if p == nil || warning != nil { // An invalid content type is being passed, which should not happen // in this context. panic(warning) } - if contentType == "application/openmetrics-text" { - p = textparse.NewOpenMetricsParser(in, symbolTable) - } - var err error for { _, err = p.Next() @@ -95,7 +91,7 @@ func fuzzParseMetricWithContentType(in []byte, contentType string) int { // Note that this is not the parser for the text-based exposition-format; that // lives in github.com/prometheus/client_golang/text. func FuzzParseMetric(in []byte) int { - return fuzzParseMetricWithContentType(in, "") + return fuzzParseMetricWithContentType(in, "text/plain") } func FuzzParseOpenMetric(in []byte) int { diff --git a/promql/fuzz_test.go b/promql/fuzz_test.go index 1f0bbaa662..4a26798ded 100644 --- a/promql/fuzz_test.go +++ b/promql/fuzz_test.go @@ -29,7 +29,7 @@ func TestfuzzParseMetricWithContentTypePanicOnInvalid(t *testing.T) { } else { err, ok := p.(error) require.True(t, ok) - require.Contains(t, err.Error(), "duplicate parameter name") + require.ErrorContains(t, err, "duplicate parameter name") } }() diff --git a/promql/info.go b/promql/info.go new file mode 100644 index 0000000000..1a9f7eb18e --- /dev/null +++ b/promql/info.go @@ -0,0 +1,454 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/grafana/regexp" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/annotations" +) + +const targetInfo = "target_info" + +// identifyingLabels are the labels we consider as identifying for info metrics. +// Currently hard coded, so we don't need knowledge of individual info metrics. +var identifyingLabels = []string{"instance", "job"} + +// evalInfo implements the info PromQL function. +func (ev *evaluator) evalInfo(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { + val, annots := ev.eval(ctx, args[0]) + mat := val.(Matrix) + // Map from data label name to matchers. + dataLabelMatchers := map[string][]*labels.Matcher{} + var infoNameMatchers []*labels.Matcher + if len(args) > 1 { + // TODO: Introduce a dedicated LabelSelector type. + labelSelector := args[1].(*parser.VectorSelector) + for _, m := range labelSelector.LabelMatchers { + dataLabelMatchers[m.Name] = append(dataLabelMatchers[m.Name], m) + if m.Name == labels.MetricName { + infoNameMatchers = append(infoNameMatchers, m) + } + } + } else { + infoNameMatchers = []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)} + } + + // Don't try to enrich info series. + ignoreSeries := map[int]struct{}{} +loop: + for i, s := range mat { + name := s.Metric.Get(labels.MetricName) + for _, m := range infoNameMatchers { + if m.Matches(name) { + ignoreSeries[i] = struct{}{} + continue loop + } + } + } + + selectHints := ev.infoSelectHints(args[0]) + infoSeries, ws, err := ev.fetchInfoSeries(ctx, mat, ignoreSeries, dataLabelMatchers, selectHints) + if err != nil { + ev.error(err) + } + annots.Merge(ws) + + res, ws := ev.combineWithInfoSeries(ctx, mat, infoSeries, ignoreSeries, dataLabelMatchers) + annots.Merge(ws) + return res, annots +} + +// infoSelectHints calculates the storage.SelectHints for selecting info series, given expr (first argument to info call). +func (ev *evaluator) infoSelectHints(expr parser.Expr) storage.SelectHints { + var nodeTimestamp *int64 + var offset int64 + parser.Inspect(expr, func(node parser.Node, path []parser.Node) error { + switch n := node.(type) { + case *parser.VectorSelector: + if n.Timestamp != nil { + nodeTimestamp = n.Timestamp + } + offset = durationMilliseconds(n.OriginalOffset) + return fmt.Errorf("end traversal") + default: + return nil + } + }) + + start := ev.startTimestamp + end := ev.endTimestamp + if nodeTimestamp != nil { + // The timestamp on the selector overrides everything. + start = *nodeTimestamp + end = *nodeTimestamp + } + // Reduce the start by one fewer ms than the lookback delta + // because wo want to exclude samples that are precisely the + // lookback delta before the eval time. + start -= durationMilliseconds(ev.lookbackDelta) - 1 + start -= offset + end -= offset + + return storage.SelectHints{ + Start: start, + End: end, + Step: ev.interval, + Func: "info", + } +} + +// fetchInfoSeries fetches info series given matching identifying labels in mat. +// Series in ignoreSeries are not fetched. +// dataLabelMatchers may be mutated. +func (ev *evaluator) fetchInfoSeries(ctx context.Context, mat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher, selectHints storage.SelectHints) (Matrix, annotations.Annotations, error) { + // A map of values for all identifying labels we are interested in. + idLblValues := map[string]map[string]struct{}{} + for i, s := range mat { + if _, exists := ignoreSeries[i]; exists { + continue + } + + // Register relevant values per identifying label for this series. + for _, l := range identifyingLabels { + val := s.Metric.Get(l) + if val == "" { + continue + } + + if idLblValues[l] == nil { + idLblValues[l] = map[string]struct{}{} + } + idLblValues[l][val] = struct{}{} + } + } + if len(idLblValues) == 0 { + return nil, nil, nil + } + + // Generate regexps for every interesting value per identifying label. + var sb strings.Builder + idLblRegexps := make(map[string]string, len(idLblValues)) + for name, vals := range idLblValues { + sb.Reset() + i := 0 + for v := range vals { + if i > 0 { + sb.WriteRune('|') + } + sb.WriteString(regexp.QuoteMeta(v)) + i++ + } + idLblRegexps[name] = sb.String() + } + + var infoLabelMatchers []*labels.Matcher + for name, re := range idLblRegexps { + infoLabelMatchers = append(infoLabelMatchers, labels.MustNewMatcher(labels.MatchRegexp, name, re)) + } + var nameMatcher *labels.Matcher + for name, ms := range dataLabelMatchers { + for i, m := range ms { + if m.Name == labels.MetricName { + nameMatcher = m + ms = slices.Delete(ms, i, i+1) + } + infoLabelMatchers = append(infoLabelMatchers, m) + } + if len(ms) > 0 { + dataLabelMatchers[name] = ms + } else { + delete(dataLabelMatchers, name) + } + } + if nameMatcher == nil { + // Default to using the target_info metric. + infoLabelMatchers = append([]*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)}, infoLabelMatchers...) + } + + infoIt := ev.querier.Select(ctx, false, &selectHints, infoLabelMatchers...) + infoSeries, ws, err := expandSeriesSet(ctx, infoIt) + if err != nil { + return nil, ws, err + } + + infoMat := ev.evalSeries(ctx, infoSeries, 0, true) + return infoMat, ws, nil +} + +// combineWithInfoSeries combines mat with select data labels from infoMat. +func (ev *evaluator) combineWithInfoSeries(ctx context.Context, mat, infoMat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher) (Matrix, annotations.Annotations) { + buf := make([]byte, 0, 1024) + lb := labels.NewScratchBuilder(0) + sigFunction := func(name string) func(labels.Labels) string { + return func(lset labels.Labels) string { + lb.Reset() + lb.Add(labels.MetricName, name) + lset.MatchLabels(true, identifyingLabels...).Range(func(l labels.Label) { + lb.Add(l.Name, l.Value) + }) + lb.Sort() + return string(lb.Labels().Bytes(buf)) + } + } + + infoMetrics := map[string]struct{}{} + for _, is := range infoMat { + lblMap := is.Metric.Map() + infoMetrics[lblMap[labels.MetricName]] = struct{}{} + } + sigfs := make(map[string]func(labels.Labels) string, len(infoMetrics)) + for name := range infoMetrics { + sigfs[name] = sigFunction(name) + } + + // Keep a copy of the original point slices so they can be returned to the pool. + origMatrices := []Matrix{ + make(Matrix, len(mat)), + make(Matrix, len(infoMat)), + } + copy(origMatrices[0], mat) + copy(origMatrices[1], infoMat) + + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + originalNumSamples := ev.currentSamples + + // Create an output vector that is as big as the input matrix with + // the most time series. + biggestLen := max(len(mat), len(infoMat)) + baseVector := make(Vector, 0, len(mat)) + infoVector := make(Vector, 0, len(infoMat)) + enh := &EvalNodeHelper{ + Out: make(Vector, 0, biggestLen), + } + type seriesAndTimestamp struct { + Series + ts int64 + } + seriess := make(map[uint64]seriesAndTimestamp, biggestLen) // Output series by series hash. + tempNumSamples := ev.currentSamples + + // For every base series, compute signature per info metric. + baseSigs := make([]map[string]string, 0, len(mat)) + for _, s := range mat { + sigs := make(map[string]string, len(infoMetrics)) + for infoName := range infoMetrics { + sigs[infoName] = sigfs[infoName](s.Metric) + } + baseSigs = append(baseSigs, sigs) + } + + infoSigs := make([]string, 0, len(infoMat)) + for _, s := range infoMat { + name := s.Metric.Map()[labels.MetricName] + infoSigs = append(infoSigs, sigfs[name](s.Metric)) + } + + var warnings annotations.Annotations + for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + // Reset number of samples in memory after each timestamp. + ev.currentSamples = tempNumSamples + // Gather input vectors for this timestamp. + baseVector, _ = ev.gatherVector(ts, mat, baseVector, nil, nil) + infoVector, _ = ev.gatherVector(ts, infoMat, infoVector, nil, nil) + + enh.Ts = ts + result, err := ev.combineWithInfoVector(baseVector, infoVector, ignoreSeries, baseSigs, infoSigs, enh, dataLabelMatchers) + if err != nil { + ev.error(err) + } + enh.Out = result[:0] // Reuse result vector. + + vecNumSamples := result.TotalSamples() + ev.currentSamples += vecNumSamples + // When we reset currentSamples to tempNumSamples during the next iteration of the loop it also + // needs to include the samples from the result here, as they're still in memory. + tempNumSamples += vecNumSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + + // Add samples in result vector to output series. + for _, sample := range result { + h := sample.Metric.Hash() + ss, exists := seriess[h] + if exists { + if ss.ts == ts { // If we've seen this output series before at this timestamp, it's a duplicate. + ev.errorf("vector cannot contain metrics with the same labelset") + } + ss.ts = ts + } else { + ss = seriesAndTimestamp{Series{Metric: sample.Metric}, ts} + } + addToSeries(&ss.Series, enh.Ts, sample.F, sample.H, numSteps) + seriess[h] = ss + } + } + + // Reuse the original point slices. + for _, m := range origMatrices { + for _, s := range m { + putFPointSlice(s.Floats) + putHPointSlice(s.Histograms) + } + } + // Assemble the output matrix. By the time we get here we know we don't have too many samples. + numSamples := 0 + output := make(Matrix, 0, len(seriess)) + for _, ss := range seriess { + numSamples += len(ss.Floats) + totalHPointSize(ss.Histograms) + output = append(output, ss.Series) + } + ev.currentSamples = originalNumSamples + numSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + return output, warnings +} + +// combineWithInfoVector combines base and info Vectors. +// Base series in ignoreSeries are not combined. +func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[int]struct{}, baseSigs []map[string]string, infoSigs []string, enh *EvalNodeHelper, dataLabelMatchers map[string][]*labels.Matcher) (Vector, error) { + if len(base) == 0 { + return nil, nil // Short-circuit: nothing is going to match. + } + + // All samples from the info Vector hashed by the matching label/values. + if enh.rightSigs == nil { + enh.rightSigs = make(map[string]Sample, len(enh.Out)) + } else { + clear(enh.rightSigs) + } + + for i, s := range info { + if s.H != nil { + ev.error(errors.New("info sample should be float")) + } + // We encode original info sample timestamps via the float value. + origT := int64(s.F) + + sig := infoSigs[i] + if existing, exists := enh.rightSigs[sig]; exists { + // We encode original info sample timestamps via the float value. + existingOrigT := int64(existing.F) + switch { + case existingOrigT > origT: + // Keep the other info sample, since it's newer. + case existingOrigT < origT: + // Keep this info sample, since it's newer. + enh.rightSigs[sig] = s + default: + // The two info samples have the same timestamp - conflict. + name := s.Metric.Map()[labels.MetricName] + ev.errorf("found duplicate series for info metric %s", name) + } + } else { + enh.rightSigs[sig] = s + } + } + + for i, bs := range base { + if _, exists := ignoreSeries[i]; exists { + // This series should not be enriched with info metric data labels. + enh.Out = append(enh.Out, Sample{ + Metric: bs.Metric, + F: bs.F, + H: bs.H, + }) + continue + } + + baseLabels := bs.Metric.Map() + enh.resetBuilder(labels.Labels{}) + + // For every info metric name, try to find an info series with the same signature. + seenInfoMetrics := map[string]struct{}{} + for infoName, sig := range baseSigs[i] { + is, exists := enh.rightSigs[sig] + if !exists { + continue + } + if _, exists := seenInfoMetrics[infoName]; exists { + continue + } + + err := is.Metric.Validate(func(l labels.Label) error { + if l.Name == labels.MetricName { + return nil + } + if _, exists := dataLabelMatchers[l.Name]; len(dataLabelMatchers) > 0 && !exists { + // Not among the specified data label matchers. + return nil + } + + if v := enh.lb.Get(l.Name); v != "" && v != l.Value { + return fmt.Errorf("conflicting label: %s", l.Name) + } + if _, exists := baseLabels[l.Name]; exists { + // Skip labels already on the base metric. + return nil + } + + enh.lb.Set(l.Name, l.Value) + return nil + }) + if err != nil { + return nil, err + } + seenInfoMetrics[infoName] = struct{}{} + } + + infoLbls := enh.lb.Labels() + if infoLbls.Len() == 0 { + // If there's at least one data label matcher not matching the empty string, + // we have to ignore this series as there are no matching info series. + allMatchersMatchEmpty := true + for _, ms := range dataLabelMatchers { + for _, m := range ms { + if !m.Matches("") { + allMatchersMatchEmpty = false + break + } + } + } + if !allMatchersMatchEmpty { + continue + } + } + + enh.resetBuilder(bs.Metric) + infoLbls.Range(func(l labels.Label) { + enh.lb.Set(l.Name, l.Value) + }) + + enh.Out = append(enh.Out, Sample{ + Metric: enh.lb.Labels(), + F: bs.F, + H: bs.H, + }) + } + return enh.Out, nil +} diff --git a/promql/info_test.go b/promql/info_test.go new file mode 100644 index 0000000000..2e7a67172f --- /dev/null +++ b/promql/info_test.go @@ -0,0 +1,140 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql_test + +import ( + "testing" + + "github.com/prometheus/prometheus/promql/promqltest" +) + +// The "info" function is experimental. This is why we write those tests here for now instead of promqltest/testdata/info.test. +func TestInfo(t *testing.T) { + engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) + promqltest.RunTest(t, ` +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + metric_with_overlapping_label{instance="a", job="1", label="value", data="base"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 1 + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +# Include one info metric data label. +eval range from 0m to 10m step 5m info(metric, {data=~".+"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Include all info metric data labels. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try including all info metric data labels, but non-matching identifying labels. +eval range from 0m to 10m step 5m info(metric_not_matching_target_info) + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + +# Try including a certain info metric data label with a non-matching matcher not accepting empty labels. +# Metric is ignored, due there being a data label matcher not matching empty labels, +# and there being no info series matches. +eval range from 0m to 10m step 5m info(metric, {non_existent=~".+"}) + +# Include a certain info metric data label together with a non-matching matcher accepting empty labels. +# Since the non_existent matcher matches empty labels, it's simply ignored when there's no match. +# XXX: This case has to include a matcher not matching empty labels, due the PromQL limitation +# that vector selectors have to contain at least one matcher not accepting empty labels. +# We might need another construct than vector selector to get around this limitation. +eval range from 0m to 10m step 5m info(metric, {data=~".+", non_existent=~".*"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Info series data labels overlapping with those of base series are ignored. +eval range from 0m to 10m step 5m info(metric_with_overlapping_label) + metric_with_overlapping_label{data="base", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Include data labels from target_info specifically. +eval range from 0m to 10m step 5m info(metric, {__name__="target_info"}) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try to include all data labels from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent"}) + metric{instance="a", job="1", label="value"} 0 1 2 + +# Try to include a certain data label from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent", data=~".+"}) + +# Include data labels from build_info. +eval range from 0m to 10m step 5m info(metric, {__name__="build_info"}) + metric{instance="a", job="1", label="value", build_data="build"} 0 1 2 + +# Include data labels from build_info and target_info. +eval range from 0m to 10m step 5m info(metric, {__name__=~".+_info"}) + metric{instance="a", job="1", label="value", build_data="build", data="info", another_data="another info"} 0 1 2 + +# Info metrics themselves are ignored when it comes to enriching with info metric data labels. +eval range from 0m to 10m step 5m info(build_info, {__name__=~".+_info", build_data=~".+"}) + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +clear + +# Overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 _ + target_info{instance="a", job="1", data="updated info", another_data="another info"} _ _ 1 + +# Conflicting info series are resolved through picking the latest sample. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value", another_data="another info"} _ _ 2 + +clear + +# Non-overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info"} 1 1 stale + target_info{instance="a", job="1", data="updated info"} _ _ 1 + +# Include info metric data labels from a metric which data labels change over time. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value"} _ _ 2 + +clear + +# Info series selector matches histogram series, info metrics should be float type. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + histogram{instance="a", job="1"} {{schema:1 sum:3 count:22 buckets:[5 10 7]}} + +eval_fail range from 0m to 10m step 5m info(metric, {__name__="histogram"}) + +clear + +# Series with skipped scrape. +load 1m + metric{instance="a", job="1", label="value"} 0 _ 2 3 4 + target_info{instance="a", job="1", data="info"} 1 _ 1 1 1 + +# Lookback works also for the info series. +eval range from 1m to 4m step 1m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 2 3 4 + +# @ operator works also with info. +# Note that we pick the timestamp missing a sample, lookback should pick previous sample. +eval range from 1m to 4m step 1m info(metric @ 60) + metric{data="info", instance="a", job="1", label="value"} 0 0 0 0 + +# offset operator works also with info. +eval range from 1m to 4m step 1m info(metric offset 1m) + metric{data="info", instance="a", job="1", label="value"} 0 0 2 3 +`, engine) +} diff --git a/promql/parser/ast.go b/promql/parser/ast.go index 162d7817ab..132ef3f0d2 100644 --- a/promql/parser/ast.go +++ b/promql/parser/ast.go @@ -208,6 +208,10 @@ type VectorSelector struct { UnexpandedSeriesSet storage.SeriesSet Series []storage.Series + // BypassEmptyMatcherCheck is true when the VectorSelector isn't required to have at least one matcher matching the empty string. + // This is the case when VectorSelector is used to represent the info function's second argument. + BypassEmptyMatcherCheck bool + PosRange posrange.PositionRange } diff --git a/promql/parser/functions.go b/promql/parser/functions.go index 99b41321fe..aa65aca275 100644 --- a/promql/parser/functions.go +++ b/promql/parser/functions.go @@ -202,10 +202,11 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeScalar, ValueTypeVector}, ReturnType: ValueTypeVector, }, - "holt_winters": { - Name: "holt_winters", - ArgTypes: []ValueType{ValueTypeMatrix, ValueTypeScalar, ValueTypeScalar}, - ReturnType: ValueTypeVector, + "double_exponential_smoothing": { + Name: "double_exponential_smoothing", + ArgTypes: []ValueType{ValueTypeMatrix, ValueTypeScalar, ValueTypeScalar}, + ReturnType: ValueTypeVector, + Experimental: true, }, "hour": { Name: "hour", @@ -223,6 +224,13 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeMatrix}, ReturnType: ValueTypeVector, }, + "info": { + Name: "info", + ArgTypes: []ValueType{ValueTypeVector, ValueTypeVector}, + ReturnType: ValueTypeVector, + Experimental: true, + Variadic: 1, + }, "irate": { Name: "irate", ArgTypes: []ValueType{ValueTypeMatrix}, diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y index da24be0c44..befb9bdf3e 100644 --- a/promql/parser/generated_parser.y +++ b/promql/parser/generated_parser.y @@ -818,12 +818,12 @@ histogram_desc_item $$ = yylex.(*parser).newMap() $$["sum"] = $3 } - | COUNT_DESC COLON number + | COUNT_DESC COLON signed_or_unsigned_number { $$ = yylex.(*parser).newMap() $$["count"] = $3 } - | ZERO_BUCKET_DESC COLON number + | ZERO_BUCKET_DESC COLON signed_or_unsigned_number { $$ = yylex.(*parser).newMap() $$["z_bucket"] = $3 @@ -875,11 +875,11 @@ bucket_set : LEFT_BRACKET bucket_set_list SPACE RIGHT_BRACKET } ; -bucket_set_list : bucket_set_list SPACE number +bucket_set_list : bucket_set_list SPACE signed_or_unsigned_number { $$ = append($1, $3) } - | number + | signed_or_unsigned_number { $$ = []float64{$1} } diff --git a/promql/parser/generated_parser.y.go b/promql/parser/generated_parser.y.go index 22231f73e2..ad58a52976 100644 --- a/promql/parser/generated_parser.y.go +++ b/promql/parser/generated_parser.y.go @@ -410,55 +410,55 @@ const yyPrivate = 57344 const yyLast = 799 var yyAct = [...]int16{ - 155, 334, 332, 276, 339, 152, 226, 39, 192, 44, - 291, 290, 156, 118, 82, 178, 229, 107, 106, 346, - 347, 348, 349, 109, 108, 198, 239, 199, 133, 110, - 105, 60, 245, 121, 6, 329, 325, 111, 328, 228, - 200, 201, 160, 119, 304, 267, 293, 128, 260, 160, - 151, 261, 159, 302, 358, 311, 122, 55, 89, 159, - 196, 241, 242, 259, 113, 243, 114, 54, 98, 99, - 302, 112, 101, 256, 104, 88, 230, 232, 234, 235, + 152, 334, 332, 155, 339, 226, 39, 192, 276, 44, + 291, 290, 118, 82, 178, 229, 107, 106, 346, 347, + 348, 349, 109, 108, 198, 239, 199, 156, 110, 105, + 6, 245, 200, 201, 133, 325, 111, 329, 228, 60, + 357, 293, 328, 304, 267, 160, 266, 128, 55, 151, + 302, 311, 302, 196, 340, 159, 55, 89, 54, 356, + 241, 242, 355, 113, 243, 114, 54, 98, 99, 265, + 112, 101, 256, 104, 88, 230, 232, 234, 235, 236, + 244, 246, 249, 250, 251, 252, 253, 257, 258, 105, + 333, 231, 233, 237, 238, 240, 247, 248, 103, 115, + 109, 254, 255, 324, 150, 218, 110, 264, 111, 270, + 77, 35, 7, 149, 188, 163, 322, 321, 173, 320, + 167, 170, 323, 165, 271, 166, 2, 3, 4, 5, + 263, 101, 194, 104, 180, 184, 197, 187, 186, 319, + 272, 202, 203, 204, 205, 206, 207, 208, 209, 210, + 211, 212, 213, 214, 215, 216, 195, 299, 103, 318, + 217, 36, 298, 1, 190, 219, 220, 317, 160, 160, + 316, 193, 160, 154, 182, 196, 229, 297, 159, 159, + 160, 358, 159, 268, 181, 183, 239, 260, 296, 262, + 159, 315, 245, 129, 314, 55, 225, 313, 161, 228, + 161, 161, 259, 312, 161, 54, 86, 295, 310, 288, + 289, 8, 161, 292, 162, 37, 162, 162, 49, 269, + 162, 241, 242, 309, 179, 243, 180, 127, 162, 126, + 308, 223, 294, 256, 48, 222, 230, 232, 234, 235, 236, 244, 246, 249, 250, 251, 252, 253, 257, 258, - 160, 115, 231, 233, 237, 238, 240, 247, 248, 103, - 159, 109, 254, 255, 324, 150, 357, 110, 333, 218, - 111, 340, 310, 149, 77, 163, 7, 105, 35, 173, - 167, 170, 161, 323, 165, 356, 166, 309, 355, 194, - 2, 3, 4, 5, 308, 322, 184, 197, 162, 186, - 321, 195, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 229, 129, 101, - 217, 104, 219, 220, 190, 266, 270, 239, 160, 121, - 268, 193, 264, 245, 55, 196, 154, 225, 159, 119, - 228, 271, 188, 160, 54, 161, 103, 117, 265, 84, - 262, 299, 122, 159, 320, 263, 298, 272, 10, 83, - 161, 162, 241, 242, 269, 187, 243, 185, 79, 288, - 289, 297, 319, 292, 256, 161, 162, 230, 232, 234, - 235, 236, 244, 246, 249, 250, 251, 252, 253, 257, - 258, 162, 294, 231, 233, 237, 238, 240, 247, 248, - 318, 317, 316, 254, 255, 180, 315, 134, 135, 136, - 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, - 147, 148, 157, 158, 169, 105, 314, 296, 300, 301, - 303, 223, 305, 313, 55, 222, 179, 168, 180, 84, - 306, 307, 177, 125, 54, 182, 295, 176, 124, 83, - 221, 312, 87, 89, 8, 181, 183, 81, 37, 86, - 175, 123, 36, 98, 99, 326, 327, 101, 102, 104, - 88, 127, 331, 126, 50, 336, 337, 338, 182, 335, - 78, 1, 342, 341, 344, 343, 49, 48, 181, 183, - 350, 351, 47, 55, 103, 352, 53, 77, 164, 56, - 46, 354, 22, 54, 59, 55, 172, 9, 9, 57, - 132, 45, 43, 130, 171, 54, 359, 42, 131, 41, - 40, 51, 191, 353, 273, 75, 85, 189, 224, 80, - 345, 18, 19, 120, 153, 20, 58, 227, 52, 116, + 221, 169, 231, 233, 237, 238, 240, 247, 248, 157, + 158, 164, 254, 255, 168, 10, 182, 300, 55, 301, + 303, 47, 305, 46, 132, 79, 181, 183, 54, 306, + 307, 45, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 43, 59, 50, + 84, 9, 9, 121, 326, 78, 327, 130, 171, 121, + 83, 42, 131, 119, 335, 336, 337, 331, 185, 119, + 338, 261, 342, 341, 344, 343, 122, 117, 41, 177, + 350, 351, 122, 55, 176, 352, 53, 77, 40, 56, + 125, 354, 22, 54, 84, 124, 172, 175, 51, 57, + 191, 353, 273, 85, 83, 189, 359, 224, 123, 80, + 345, 120, 81, 153, 58, 75, 227, 52, 116, 0, + 0, 18, 19, 0, 0, 20, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 0, 0, 0, 13, 0, 0, 0, 24, 0, 30, - 0, 0, 31, 32, 55, 38, 0, 53, 77, 0, + 0, 0, 31, 32, 55, 38, 105, 53, 77, 0, 56, 275, 0, 22, 54, 0, 0, 0, 274, 0, 57, 0, 278, 279, 277, 284, 286, 283, 285, 280, - 281, 282, 287, 0, 0, 0, 75, 0, 0, 0, - 0, 0, 18, 19, 0, 0, 20, 0, 0, 0, - 0, 0, 76, 0, 0, 0, 0, 61, 62, 63, + 281, 282, 287, 87, 89, 0, 75, 0, 0, 0, + 0, 0, 18, 19, 98, 99, 20, 0, 101, 102, + 104, 88, 76, 0, 0, 0, 0, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, - 74, 0, 0, 0, 13, 0, 0, 0, 24, 0, + 74, 0, 0, 0, 13, 103, 0, 0, 24, 0, 30, 0, 55, 31, 32, 53, 77, 0, 56, 330, 0, 22, 54, 0, 0, 0, 0, 0, 57, 0, 278, 279, 277, 284, 286, 283, 285, 280, 281, 282, @@ -493,51 +493,51 @@ var yyAct = [...]int16{ } var yyPact = [...]int16{ - 32, 106, 569, 569, 405, 526, -1000, -1000, -1000, 105, + 28, 102, 569, 569, 405, 526, -1000, -1000, -1000, 98, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, -1000, -1000, 277, -1000, 297, -1000, 650, + -1000, -1000, -1000, -1000, -1000, 342, -1000, 204, -1000, 650, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, 22, 95, -1000, -1000, 483, -1000, 483, 101, + -1000, -1000, 21, 93, -1000, -1000, 483, -1000, 483, 97, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, -1000, -1000, -1000, -1000, 167, -1000, -1000, - 281, -1000, -1000, 309, -1000, 23, -1000, -50, -50, -50, - -50, -50, -50, -50, -50, -50, -50, -50, -50, -50, - -50, -50, -50, 48, 174, 336, 95, -56, -1000, 262, - 262, 324, -1000, 631, 103, -1000, 280, -1000, -1000, 274, - 241, -1000, -1000, -1000, 187, -1000, 180, -1000, 159, 483, - -1000, -57, -40, -1000, 483, 483, 483, 483, 483, 483, + -1000, -1000, -1000, -1000, -1000, -1000, -1000, 307, -1000, -1000, + 338, -1000, -1000, 225, -1000, 23, -1000, -44, -44, -44, + -44, -44, -44, -44, -44, -44, -44, -44, -44, -44, + -44, -44, -44, 47, 171, 259, 93, -57, -1000, 249, + 249, 324, -1000, 631, 75, -1000, 327, -1000, -1000, 222, + 130, -1000, -1000, -1000, 298, -1000, 112, -1000, 159, 483, + -1000, -58, -48, -1000, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, -1000, - 165, -1000, -1000, 94, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, 40, 40, 269, -1000, -1000, -1000, -1000, 155, -1000, - -1000, 41, -1000, 650, -1000, -1000, 31, -1000, 170, -1000, - -1000, -1000, -1000, -1000, 163, -1000, -1000, -1000, -1000, -1000, - 19, 144, 140, -1000, -1000, -1000, 404, 16, 262, 262, - 262, 262, 103, 103, 251, 251, 251, 715, 696, 251, - 251, 715, 103, 103, 251, 103, 16, -1000, 24, -1000, - -1000, -1000, 265, -1000, 189, -1000, -1000, -1000, -1000, -1000, + 39, -1000, -1000, 90, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, 36, 36, 229, -1000, -1000, -1000, -1000, 174, -1000, + -1000, 180, -1000, 650, -1000, -1000, 301, -1000, 105, -1000, + -1000, -1000, -1000, -1000, 44, -1000, -1000, -1000, -1000, -1000, + 18, 157, 83, -1000, -1000, -1000, 404, 15, 249, 249, + 249, 249, 75, 75, 402, 402, 402, 715, 696, 402, + 402, 715, 75, 75, 402, 75, 15, -1000, 19, -1000, + -1000, -1000, 186, -1000, 155, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - 483, -1000, -1000, -1000, -1000, -1000, -1000, 34, 34, 18, - 34, 44, 44, 110, 38, -1000, -1000, 285, 267, 260, - 240, 236, 235, 234, 206, 188, 134, 129, -1000, -1000, - -1000, -1000, -1000, -1000, 102, -1000, -1000, -1000, 14, -1000, - 650, -1000, -1000, -1000, 34, -1000, 12, 9, 482, -1000, - -1000, -1000, 51, 81, 40, 40, 40, 97, 97, 51, - 97, 51, -73, -1000, -1000, -1000, -1000, -1000, 34, 34, - -1000, -1000, -1000, 34, -1000, -1000, -1000, -1000, -1000, -1000, - 40, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, 104, -1000, 33, -1000, -1000, -1000, -1000, + 483, -1000, -1000, -1000, -1000, -1000, -1000, 31, 31, 17, + 31, 37, 37, 206, 34, -1000, -1000, 197, 191, 188, + 185, 164, 161, 153, 133, 113, 111, 110, -1000, -1000, + -1000, -1000, -1000, -1000, 101, -1000, -1000, -1000, 13, -1000, + 650, -1000, -1000, -1000, 31, -1000, 16, 11, 482, -1000, + -1000, -1000, 33, 163, 163, 163, 36, 40, 40, 33, + 40, 33, -74, -1000, -1000, -1000, -1000, -1000, 31, 31, + -1000, -1000, -1000, 31, -1000, -1000, -1000, -1000, -1000, -1000, + 163, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, 38, -1000, 160, -1000, -1000, -1000, -1000, } var yyPgo = [...]int16{ - 0, 379, 13, 378, 6, 15, 377, 344, 376, 374, - 373, 370, 198, 294, 369, 14, 368, 10, 11, 367, - 366, 8, 364, 3, 4, 363, 2, 1, 0, 362, - 12, 5, 361, 360, 18, 158, 359, 358, 7, 357, - 354, 17, 353, 31, 352, 9, 351, 350, 340, 332, - 327, 326, 314, 321, 302, + 0, 368, 12, 367, 5, 14, 366, 298, 364, 363, + 361, 360, 265, 211, 359, 13, 357, 10, 11, 355, + 353, 7, 352, 8, 4, 351, 2, 1, 3, 350, + 27, 0, 348, 338, 17, 193, 328, 312, 6, 311, + 308, 16, 307, 39, 297, 9, 281, 274, 273, 271, + 234, 218, 299, 163, 161, } var yyR1 = [...]int8{ @@ -630,9 +630,9 @@ var yyChk = [...]int16{ -38, -27, 19, -27, 26, -27, -21, -21, 24, 17, 2, 17, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 2, 22, -4, -27, 26, 26, - 17, -23, -26, 57, -27, -31, -28, -28, -28, -24, + 17, -23, -26, 57, -27, -31, -31, -31, -28, -24, 14, -24, -26, -24, -26, -11, 92, 93, 94, 95, - -27, -27, -27, -25, -28, 24, 21, 2, 21, -28, + -27, -27, -27, -25, -31, 24, 21, 2, 21, -31, } var yyDef = [...]int16{ diff --git a/promql/parser/lex.go b/promql/parser/lex.go index d031e83307..82bf0367b8 100644 --- a/promql/parser/lex.go +++ b/promql/parser/lex.go @@ -610,6 +610,9 @@ func lexBuckets(l *Lexer) stateFn { case isSpace(r): l.emit(SPACE) return lexSpace + case r == '-': + l.emit(SUB) + return lexNumber case isDigit(r): l.backup() return lexNumber diff --git a/promql/parser/parse.go b/promql/parser/parse.go index ae558dccc0..05549eaac8 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -784,6 +784,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { } } + if n.Func.Name == "info" && len(n.Args) > 1 { + // Check the type is correct first + if n.Args[1].Type() != ValueTypeVector { + p.addParseErrf(node.PositionRange(), "expected type %s in %s, got %s", DocumentedType(ValueTypeVector), fmt.Sprintf("call to function %q", n.Func.Name), DocumentedType(n.Args[1].Type())) + } + // Check the vector selector in the input doesn't contain a metric name + if n.Args[1].(*VectorSelector).Name != "" { + p.addParseErrf(n.Args[1].PositionRange(), "expected label selectors only, got vector selector instead") + } + // Set Vector Selector flag to bypass empty matcher check + n.Args[1].(*VectorSelector).BypassEmptyMatcherCheck = true + } + for i, arg := range n.Args { if i >= len(n.Func.ArgTypes) { if n.Func.Variadic == 0 { @@ -830,17 +843,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { // metric name is a non-empty matcher. break } - // A Vector selector must contain at least one non-empty matcher to prevent - // implicit selection of all metrics (e.g. by a typo). - notEmpty := false - for _, lm := range n.LabelMatchers { - if lm != nil && !lm.Matches("") { - notEmpty = true - break + if !n.BypassEmptyMatcherCheck { + // A Vector selector must contain at least one non-empty matcher to prevent + // implicit selection of all metrics (e.g. by a typo). + notEmpty := false + for _, lm := range n.LabelMatchers { + if lm != nil && !lm.Matches("") { + notEmpty = true + break + } + } + if !notEmpty { + p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } - } - if !notEmpty { - p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } case *NumberLiteral, *StringLiteral: diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index 37748323ce..0e5e2f638b 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -3872,6 +3872,81 @@ var testExpr = []struct { }, }, }, + { + input: `info(rate(http_request_counter_total{}[5m]))`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &Call{ + Func: MustGetFunction("rate"), + PosRange: posrange.PositionRange{ + Start: 5, + End: 43, + }, + Args: Expressions{ + &MatrixSelector{ + VectorSelector: &VectorSelector{ + Name: "http_request_counter_total", + OriginalOffset: 0, + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 10, + End: 38, + }, + }, + EndPos: 42, + Range: 5 * time.Minute, + }, + }, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 44, + }, + }, + }, + { + input: `info(rate(http_request_counter_total{}[5m]), target_info{foo="bar"})`, + fail: true, + errMsg: `1:46: parse error: expected label selectors only, got vector selector instead`, + }, + { + input: `info(http_request_counter_total{namespace="zzz"}, {foo="bar", bar="baz"})`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &VectorSelector{ + Name: "http_request_counter_total", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "namespace", "zzz"), + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 5, + End: 48, + }, + }, + &VectorSelector{ + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "foo", "bar"), + MustLabelMatcher(labels.MatchEqual, "bar", "baz"), + }, + PosRange: posrange.PositionRange{ + Start: 50, + End: 72, + }, + BypassEmptyMatcherCheck: true, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 73, + }, + }, + }, } func makeInt64Pointer(val int64) *int64 { @@ -3889,6 +3964,12 @@ func readable(s string) string { } func TestParseExpressions(t *testing.T) { + // Enable experimental functions testing. + EnableExperimentalFunctions = true + t.Cleanup(func() { + EnableExperimentalFunctions = false + }) + model.NameValidationScheme = model.UTF8Validation for _, test := range testExpr { t.Run(readable(test.input), func(t *testing.T) { @@ -3925,8 +4006,7 @@ func TestParseExpressions(t *testing.T) { require.Equal(t, expected, expr, "error on input '%s'", test.input) } else { - require.Error(t, err) - require.Contains(t, err.Error(), test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error()) + require.ErrorContains(t, err, test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error()) var errorList ParseErrors ok := errors.As(err, &errorList) @@ -4084,17 +4164,17 @@ func TestParseHistogramSeries(t *testing.T) { }, { name: "all properties used", - input: `{} {{schema:1 sum:-0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5 counter_reset_hint:gauge}}`, + input: `{} {{schema:1 sum:0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:3 n_buckets:[4.1 5] n_offset:5 counter_reset_hint:gauge}}`, expected: []histogram.FloatHistogram{{ Schema: 1, - Sum: -0.3, + Sum: 0.3, Count: 3.1, ZeroCount: 7.1, ZeroThreshold: 0.05, PositiveBuckets: []float64{5.1, 10, 7}, - PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + PositiveSpans: []histogram.Span{{Offset: 3, Length: 3}}, NegativeBuckets: []float64{4.1, 5}, - NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + NegativeSpans: []histogram.Span{{Offset: 5, Length: 2}}, CounterResetHint: histogram.GaugeType, }}, }, @@ -4114,6 +4194,22 @@ func TestParseHistogramSeries(t *testing.T) { CounterResetHint: histogram.GaugeType, }}, }, + { + name: "all properties used, with negative values where supported", + input: `{} {{schema:1 sum:-0.3 count:-3.1 z_bucket:-7.1 z_bucket_w:0.05 buckets:[-5.1 -10 -7] offset:-3 n_buckets:[-4.1 -5] n_offset:-5 counter_reset_hint:gauge}}`, + expected: []histogram.FloatHistogram{{ + Schema: 1, + Sum: -0.3, + Count: -3.1, + ZeroCount: -7.1, + ZeroThreshold: 0.05, + PositiveBuckets: []float64{-5.1, -10, -7}, + PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + NegativeBuckets: []float64{-4.1, -5}, + NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + CounterResetHint: histogram.GaugeType, + }}, + }, { name: "static series", input: `{} {{buckets:[5 10 7] schema:1}}x2`, @@ -4385,6 +4481,22 @@ func TestHistogramTestExpression(t *testing.T) { }, expected: `{{offset:-3 buckets:[5.1 0 0 0 0 10 7] n_offset:-1 n_buckets:[4.1 5 0 0 7 8 9]}}`, }, + { + name: "known counter reset hint", + input: histogram.FloatHistogram{ + Schema: 1, + Sum: -0.3, + Count: 3.1, + ZeroCount: 7.1, + ZeroThreshold: 0.05, + PositiveBuckets: []float64{5.1, 10, 7}, + PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + NegativeBuckets: []float64{4.1, 5}, + NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + CounterResetHint: histogram.CounterReset, + }, + expected: `{{schema:1 count:3.1 sum:-0.3 z_bucket:7.1 z_bucket_w:0.05 counter_reset_hint:reset offset:-3 buckets:[5.1 10 7] n_offset:-5 n_buckets:[4.1 5]}}`, + }, } { t.Run(test.name, func(t *testing.T) { expression := test.input.TestExpression() @@ -4436,7 +4548,7 @@ func TestRecoverParserError(t *testing.T) { e := errors.New("custom error") defer func() { - require.Equal(t, e.Error(), err.Error()) + require.EqualError(t, err, e.Error()) }() defer p.recover(&err) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index ff709e4426..e078bcb60b 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -39,6 +39,7 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/almost" + "github.com/prometheus/prometheus/util/convertnhcb" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -46,8 +47,8 @@ import ( var ( patSpace = regexp.MustCompile("[\t ]+") patLoad = regexp.MustCompile(`^load(?:_(with_nhcb))?\s+(.+?)$`) - patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|warn|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) - patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`) + patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|warn|ordered|info))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) + patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn|info))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`) ) const ( @@ -321,6 +322,8 @@ func (t *test) parseEval(lines []string, i int) (int, *evalCmd, error) { cmd.fail = true case "warn": cmd.warn = true + case "info": + cmd.info = true } for j := 1; i+1 < len(lines); j++ { @@ -477,43 +480,22 @@ func (cmd *loadCmd) append(a storage.Appender) error { return nil } -func getHistogramMetricBase(m labels.Labels, suffix string) (labels.Labels, uint64) { - mName := m.Get(labels.MetricName) - baseM := labels.NewBuilder(m). - Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). - Del(labels.BucketLabel). - Labels() - hash := baseM.Hash() - return baseM, hash -} - type tempHistogramWrapper struct { metric labels.Labels upperBounds []float64 - histogramByTs map[int64]tempHistogram + histogramByTs map[int64]convertnhcb.TempHistogram } func newTempHistogramWrapper() tempHistogramWrapper { return tempHistogramWrapper{ upperBounds: []float64{}, - histogramByTs: map[int64]tempHistogram{}, + histogramByTs: map[int64]convertnhcb.TempHistogram{}, } } -type tempHistogram struct { - bucketCounts map[float64]float64 - count float64 - sum float64 -} - -func newTempHistogram() tempHistogram { - return tempHistogram{ - bucketCounts: map[float64]float64{}, - } -} - -func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*tempHistogram, float64)) { - m2, m2hash := getHistogramMetricBase(m, suffix) +func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*convertnhcb.TempHistogram, float64)) { + m2 := convertnhcb.GetHistogramMetricBase(m, suffix) + m2hash := m2.Hash() histogramWrapper, exists := histogramMap[m2hash] if !exists { histogramWrapper = newTempHistogramWrapper() @@ -528,7 +510,7 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap } histogram, exists := histogramWrapper.histogramByTs[s.T] if !exists { - histogram = newTempHistogram() + histogram = convertnhcb.NewTempHistogram() } updateHistogram(&histogram, s.F) histogramWrapper.histogramByTs[s.T] = histogram @@ -536,34 +518,6 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap histogramMap[m2hash] = histogramWrapper } -func processUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) { - sort.Float64s(upperBounds0) - upperBounds := make([]float64, 0, len(upperBounds0)) - prevLE := math.Inf(-1) - for _, le := range upperBounds0 { - if le != prevLE { // deduplicate - upperBounds = append(upperBounds, le) - prevLE = le - } - } - var customBounds []float64 - if upperBounds[len(upperBounds)-1] == math.Inf(1) { - customBounds = upperBounds[:len(upperBounds)-1] - } else { - customBounds = upperBounds - } - return upperBounds, &histogram.FloatHistogram{ - Count: 0, - Sum: 0, - Schema: histogram.CustomBucketsSchema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: uint32(len(upperBounds))}, - }, - PositiveBuckets: make([]float64, len(upperBounds)), - CustomValues: customBounds, - } -} - // If classic histograms are defined, convert them into native histograms with custom // bounds and append the defined time series to the storage. func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { @@ -582,16 +536,16 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { } processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) { histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le) - }, func(histogram *tempHistogram, f float64) { - histogram.bucketCounts[le] = f + }, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.BucketCounts[le] = f }) case strings.HasSuffix(mName, "_count"): - processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.count = f + processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.Count = f }) case strings.HasSuffix(mName, "_sum"): - processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.sum = f + processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.Sum = f }) } } @@ -599,30 +553,21 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { // Convert the collated classic histogram data into native histograms // with custom bounds and append them to the storage. for _, histogramWrapper := range histogramMap { - upperBounds, fhBase := processUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds) + upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true) + fhBase := hBase.ToFloat(nil) samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs)) for t, histogram := range histogramWrapper.histogramByTs { - fh := fhBase.Copy() - var prevCount, total float64 - for i, le := range upperBounds { - currCount, exists := histogram.bucketCounts[le] - if !exists { - currCount = 0 + h, fh := convertnhcb.NewHistogram(histogram, upperBounds, hBase, fhBase) + if fh == nil { + if err := h.Validate(); err != nil { + return err } - count := currCount - prevCount - fh.PositiveBuckets[i] = count - total += count - prevCount = currCount + fh = h.ToFloat(nil) } - fh.Sum = histogram.sum - if histogram.count != 0 { - total = histogram.count - } - fh.Count = total - s := promql.Sample{T: t, H: fh.Compact(0)} - if err := s.H.Validate(); err != nil { + if err := fh.Validate(); err != nil { return err } + s := promql.Sample{T: t, H: fh} samples = append(samples, s) } sort.Slice(samples, func(i, j int) bool { return samples[i].T < samples[j].T }) @@ -657,10 +602,10 @@ type evalCmd struct { step time.Duration line int - isRange bool // if false, instant query - fail, warn, ordered bool - expectedFailMessage string - expectedFailRegexp *regexp.Regexp + isRange bool // if false, instant query + fail, warn, ordered, info bool + expectedFailMessage string + expectedFailRegexp *regexp.Regexp metrics map[uint64]labels.Labels expectScalar bool @@ -790,7 +735,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } if !compareNativeHistogram(expected.H.Compact(0), actual.H.Compact(0)) { - return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H, actual.H, formatSeriesResult(s)) + return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H.TestExpression(), actual.H.TestExpression(), formatSeriesResult(s)) } } } @@ -1006,7 +951,13 @@ func formatSeriesResult(s promql.Series) string { histogramPlural = "" } - return fmt.Sprintf("%v float point%s %v and %v histogram point%s %v", len(s.Floats), floatPlural, s.Floats, len(s.Histograms), histogramPlural, s.Histograms) + histograms := make([]string, 0, len(s.Histograms)) + + for _, p := range s.Histograms { + histograms = append(histograms, fmt.Sprintf("%v @[%v]", p.H.TestExpression(), p.T)) + } + + return fmt.Sprintf("%v float point%s %v and %v histogram point%s %v", len(s.Floats), floatPlural, s.Floats, len(s.Histograms), histogramPlural, histograms) } // HistogramTestExpression returns TestExpression() for the given histogram or "" if the histogram is nil. @@ -1202,13 +1153,16 @@ func (t *test) runInstantQuery(iq atModifierTestCase, cmd *evalCmd, engine promq if res.Err == nil && cmd.fail { return fmt.Errorf("expected error evaluating query %q (line %d) but got none", iq.expr, cmd.line) } - countWarnings, _ := res.Warnings.CountWarningsAndInfo() + countWarnings, countInfo := res.Warnings.CountWarningsAndInfo() if !cmd.warn && countWarnings > 0 { return fmt.Errorf("unexpected warnings evaluating query %q (line %d): %v", iq.expr, cmd.line, res.Warnings) } if cmd.warn && countWarnings == 0 { return fmt.Errorf("expected warnings evaluating query %q (line %d) but got none", iq.expr, cmd.line) } + if cmd.info && countInfo == 0 { + return fmt.Errorf("expected info annotations evaluating query %q (line %d) but got none", iq.expr, cmd.line) + } err = cmd.compareResult(res.Value) if err != nil { return fmt.Errorf("error in %s %s (line %d): %w", cmd, iq.expr, cmd.line, err) diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index bc0027a686..5da924e9a5 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -381,7 +381,7 @@ load 5m eval range from 0 to 10m step 5m testmetric testmetric {{schema:-1 sum:4 count:1 buckets:[1] offset:1}} {{schema:-1 sum:7 count:1 buckets:[1] offset:1}} {{schema:-1 sum:8 count:1 buckets:[1] offset:1}} `, - expectedError: `error in eval testmetric (line 5): expected histogram value at index 1 (t=300000) for {__name__="testmetric"} to be {count:1, sum:7, (1,4]:1}, but got {count:1, sum:5, (1,4]:1} (result has 0 float points [] and 3 histogram points [{count:1, sum:4, (1,4]:1} @[0] {count:1, sum:5, (1,4]:1} @[300000] {count:1, sum:6, (1,4]:1} @[600000]])`, + expectedError: `error in eval testmetric (line 5): expected histogram value at index 1 (t=300000) for {__name__="testmetric"} to be {{schema:-1 count:1 sum:7 offset:1 buckets:[1]}}, but got {{schema:-1 count:1 sum:5 counter_reset_hint:not_reset offset:1 buckets:[1]}} (result has 0 float points [] and 3 histogram points [{{schema:-1 count:1 sum:4 offset:1 buckets:[1]}} @[0] {{schema:-1 count:1 sum:5 counter_reset_hint:not_reset offset:1 buckets:[1]}} @[300000] {{schema:-1 count:1 sum:6 counter_reset_hint:not_reset offset:1 buckets:[1]}} @[600000]])`, }, "range query with too many points for query time range": { input: testData + ` @@ -532,7 +532,7 @@ load 5m eval range from 0 to 5m step 5m testmetric testmetric 2 3 `, - expectedError: `error in eval testmetric (line 5): expected 2 float points and 0 histogram points for {__name__="testmetric"}, but got 0 float points [] and 2 histogram points [{count:0, sum:0} @[0] {count:0, sum:0} @[300000]]`, + expectedError: `error in eval testmetric (line 5): expected 2 float points and 0 histogram points for {__name__="testmetric"}, but got 0 float points [] and 2 histogram points [{{}} @[0] {{counter_reset_hint:not_reset}} @[300000]]`, }, "range query with expected mixed results": { input: ` @@ -552,7 +552,7 @@ load 5m eval range from 0 to 5m step 5m testmetric testmetric {{}} 3 `, - expectedError: `error in eval testmetric (line 5): expected float value at index 0 for {__name__="testmetric"} to have timestamp 300000, but it had timestamp 0 (result has 1 float point [3 @[0]] and 1 histogram point [{count:0, sum:0} @[300000]])`, + expectedError: `error in eval testmetric (line 5): expected float value at index 0 for {__name__="testmetric"} to have timestamp 300000, but it had timestamp 0 (result has 1 float point [3 @[0]] and 1 histogram point [{{}} @[300000]])`, }, "instant query with expected scalar result": { input: ` diff --git a/promql/promqltest/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test index 3c91883960..e2eb381dbc 100644 --- a/promql/promqltest/testdata/aggregators.test +++ b/promql/promqltest/testdata/aggregators.test @@ -572,3 +572,160 @@ clear # #eval instant at 1m count(topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without())) # {} 1 + +clear + +# Test stddev produces consistent results regardless the order the data is loaded in. +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + +clear + +load 5m + series{label="a"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} NaN + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} inf + +eval instant at 0m stddev (series) + {} NaN + +eval instant at 0m stdvar (series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} inf + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series inf + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index 4b025448a5..fb1d169624 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -651,7 +651,7 @@ eval_ordered instant at 50m sort_by_label(node_uname_info, "release") node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 100 node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 100 -# Tests for holt_winters +# Tests for double_exponential_smoothing clear # positive trends @@ -661,7 +661,7 @@ load 10s http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000 http_requests{job="api-server", instance="1", group="canary"} 0+40x2000 -eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1) +eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1) {job="api-server", instance="0", group="production"} 8000 {job="api-server", instance="1", group="production"} 16000 {job="api-server", instance="0", group="canary"} 24000 @@ -675,7 +675,7 @@ load 10s http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300-80x1000 http_requests{job="api-server", instance="1", group="canary"} 0-40x1000 0+40x1000 -eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1) +eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1) {job="api-server", instance="0", group="production"} 0 {job="api-server", instance="1", group="production"} -16000 {job="api-server", instance="0", group="canary"} 24000 @@ -1258,3 +1258,12 @@ load 1m # We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s. eval range from 0 to 61s step 1s timestamp(metric) {} 0x59 60 60 + +clear + +# Check round with mixed data types +load 1m + mixed_metric {{schema:0 sum:5 count:4 buckets:[1 2 1]}} 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}} + +eval range from 0 to 5m step 1m round(mixed_metric) + {} _ 1 2 3 diff --git a/promql/promqltest/testdata/histograms.test b/promql/promqltest/testdata/histograms.test index 68232a815d..6089fd01d2 100644 --- a/promql/promqltest/testdata/histograms.test +++ b/promql/promqltest/testdata/histograms.test @@ -421,6 +421,25 @@ eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket eval instant at 50m sum(request_duration_seconds) {} {{schema:-53 count:250 custom_values:[0.1 0.2] buckets:[100 90 60]}} +eval instant at 50m sum(request_duration_seconds{job="job1",instance="ins1"} + ignoring(job,instance) request_duration_seconds{job="job1",instance="ins2"} + ignoring(job,instance) request_duration_seconds{job="job2",instance="ins1"} + ignoring(job,instance) request_duration_seconds{job="job2",instance="ins2"}) + {} {{schema:-53 count:250 custom_values:[0.1 0.2] buckets:[100 90 60]}} + +eval instant at 50m avg(request_duration_seconds) + {} {{schema:-53 count:62.5 custom_values:[0.1 0.2] buckets:[25 22.5 15]}} + +# To verify the result above, calculate from classic histogram as well. +eval instant at 50m avg (request_duration_seconds_bucket{le="0.1"}) + {} 25 + +eval instant at 50m avg (request_duration_seconds_bucket{le="0.2"}) - avg (request_duration_seconds_bucket{le="0.1"}) + {} 22.5 + +eval instant at 50m avg (request_duration_seconds_bucket{le="+Inf"}) - avg (request_duration_seconds_bucket{le="0.2"}) + {} 15 + +eval instant at 50m count(request_duration_seconds) + {} 4 + # A histogram with nonmonotonic bucket counts. This may happen when recording # rule evaluation or federation races scrape ingestion, causing some buckets # counts to be derived from fewer samples. @@ -504,3 +523,36 @@ eval instant at 5m histogram_quantile(1.0, sum by (le) (rate(const_histogram_buc eval instant at 5m histogram_quantile(1.0, sum(rate(const_histogram[5m]))) {} NaN + +load_with_nhcb 1m + histogram_over_time_bucket{le="0"} 0 1 3 9 + histogram_over_time_bucket{le="1"} 2 3 3 9 + histogram_over_time_bucket{le="2"} 3 8 5 10 + histogram_over_time_bucket{le="4"} 3 10 6 18 + +# Test custom buckets with sum_over_time, avg_over_time. +eval instant at 3m sum_over_time(histogram_over_time[4m:1m]) + {} {{schema:-53 count:37 custom_values:[0 1 2 4] buckets:[13 4 9 11]}} + +eval instant at 3m avg_over_time(histogram_over_time[4m:1m]) + {} {{schema:-53 count:9.25 custom_values:[0 1 2 4] buckets:[3.25 1 2.25 2.75]}} + +# Test custom buckets with counter reset +load_with_nhcb 5m + histogram_with_reset_bucket{le="1"} 1 3 9 + histogram_with_reset_bucket{le="2"} 3 3 9 + histogram_with_reset_bucket{le="4"} 8 5 12 + histogram_with_reset_bucket{le="8"} 10 6 18 + histogram_with_reset_sum{} 36 16 61 + +eval instant at 10m increase(histogram_with_reset[15m]) + {} {{schema:-53 count:27 sum:91.5 custom_values:[1 2 4 8] counter_reset_hint:gauge buckets:[13.5 0 4.5 9]}} + +eval instant at 10m resets(histogram_with_reset[15m]) + {} 1 + +eval instant at 10m histogram_count(increase(histogram_with_reset[15m])) + {} 27 + +eval instant at 10m histogram_sum(increase(histogram_with_reset[15m])) + {} 91.5 diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 549781e8c5..8c5814ae8a 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -46,9 +46,12 @@ eval instant at 1m histogram_fraction(1, 2, single_histogram) eval instant at 1m histogram_fraction(0, 8, single_histogram) {} 1 -# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2. +# Median is 1.414213562373095 (2**2**-1, or sqrt(2)) due to +# exponential interpolation, i.e. the "midpoint" within range 1 < x <= +# 2 is assumed where the bucket boundary would be if we increased the +# resolution of the histogram by one step. eval instant at 1m histogram_quantile(0.5, single_histogram) - {} 1.5 + {} 1.414213562373095 clear @@ -68,8 +71,9 @@ eval instant at 5m histogram_avg(multi_histogram) eval instant at 5m histogram_fraction(1, 2, multi_histogram) {} 0.5 +# See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, multi_histogram) - {} 1.5 + {} 1.414213562373095 # Each entry should look the same as the first. @@ -85,8 +89,9 @@ eval instant at 50m histogram_avg(multi_histogram) eval instant at 50m histogram_fraction(1, 2, multi_histogram) {} 0.5 +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, multi_histogram) - {} 1.5 + {} 1.414213562373095 clear @@ -109,8 +114,9 @@ eval instant at 5m histogram_avg(incr_histogram) eval instant at 5m histogram_fraction(1, 2, incr_histogram) {} 0.6 +# See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, incr_histogram) - {} 1.5 + {} 1.414213562373095 eval instant at 50m incr_histogram @@ -129,16 +135,18 @@ eval instant at 50m histogram_avg(incr_histogram) eval instant at 50m histogram_fraction(1, 2, incr_histogram) {} 0.8571428571428571 +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, incr_histogram) - {} 1.5 + {} 1.414213562373095 # Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. eval instant at 50m rate(incr_histogram[10m]) {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} # Calculate the 50th percentile of observations over the last 10m. +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) - {} 1.5 + {} 1.414213562373095 clear @@ -211,8 +219,9 @@ eval instant at 1m histogram_avg(negative_histogram) eval instant at 1m histogram_fraction(-2, -1, negative_histogram) {} 0.5 +# Exponential interpolation works the same as for positive buckets, just mirrored. eval instant at 1m histogram_quantile(0.5, negative_histogram) - {} -1.5 + {} -1.414213562373095 clear @@ -233,8 +242,9 @@ eval instant at 5m histogram_avg(two_samples_histogram) eval instant at 5m histogram_fraction(-2, -1, two_samples_histogram) {} 0.5 +# See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, two_samples_histogram) - {} -1.5 + {} -1.414213562373095 clear @@ -392,20 +402,24 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_1) eval instant at 10m histogram_quantile(1, histogram_quantile_1) {} 16 +# The following quantiles are within a bucket. Exponential +# interpolation is applied (rather than linear, as it is done for +# classic histograms), leading to slightly different quantile values. eval instant at 10m histogram_quantile(0.99, histogram_quantile_1) - {} 15.759999999999998 + {} 15.67072476139083 eval instant at 10m histogram_quantile(0.9, histogram_quantile_1) - {} 13.600000000000001 + {} 12.99603834169977 eval instant at 10m histogram_quantile(0.6, histogram_quantile_1) - {} 4.799999999999997 + {} 4.594793419988138 eval instant at 10m histogram_quantile(0.5, histogram_quantile_1) - {} 1.6666666666666665 + {} 1.5874010519681994 +# Linear interpolation within the zero bucket after all. eval instant at 10m histogram_quantile(0.1, histogram_quantile_1) - {} 0.0006000000000000001 + {} 0.0006 eval instant at 10m histogram_quantile(0, histogram_quantile_1) {} 0 @@ -425,17 +439,20 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_2) eval instant at 10m histogram_quantile(1, histogram_quantile_2) {} 0 +# Again, the quantile values here are slightly different from what +# they would be with linear interpolation. Note that quantiles +# ending up in the zero bucket are linearly interpolated after all. eval instant at 10m histogram_quantile(0.99, histogram_quantile_2) - {} -6.000000000000048e-05 + {} -0.00006 eval instant at 10m histogram_quantile(0.9, histogram_quantile_2) - {} -0.0005999999999999996 + {} -0.0006 eval instant at 10m histogram_quantile(0.5, histogram_quantile_2) - {} -1.6666666666666667 + {} -1.5874010519681996 eval instant at 10m histogram_quantile(0.1, histogram_quantile_2) - {} -13.6 + {} -12.996038341699768 eval instant at 10m histogram_quantile(0, histogram_quantile_2) {} -16 @@ -445,7 +462,9 @@ eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_2) clear -# Apply quantile function to histogram with both positive and negative buckets with zero bucket. +# Apply quantile function to histogram with both positive and negative +# buckets with zero bucket. +# First positive buckets with exponential interpolation. load 10m histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -456,31 +475,34 @@ eval instant at 10m histogram_quantile(1, histogram_quantile_3) {} 16 eval instant at 10m histogram_quantile(0.99, histogram_quantile_3) - {} 15.519999999999996 + {} 15.34822590920423 eval instant at 10m histogram_quantile(0.9, histogram_quantile_3) - {} 11.200000000000003 + {} 10.556063286183155 eval instant at 10m histogram_quantile(0.7, histogram_quantile_3) - {} 1.2666666666666657 + {} 1.2030250360821164 +# Linear interpolation in the zero bucket, symmetrically centered around +# the zero point. eval instant at 10m histogram_quantile(0.55, histogram_quantile_3) - {} 0.0006000000000000005 + {} 0.0006 eval instant at 10m histogram_quantile(0.5, histogram_quantile_3) {} 0 eval instant at 10m histogram_quantile(0.45, histogram_quantile_3) - {} -0.0005999999999999996 + {} -0.0006 +# Finally negative buckets with mirrored exponential interpolation. eval instant at 10m histogram_quantile(0.3, histogram_quantile_3) - {} -1.266666666666667 + {} -1.2030250360821169 eval instant at 10m histogram_quantile(0.1, histogram_quantile_3) - {} -11.2 + {} -10.556063286183155 eval instant at 10m histogram_quantile(0.01, histogram_quantile_3) - {} -15.52 + {} -15.34822590920423 eval instant at 10m histogram_quantile(0, histogram_quantile_3) {} -16 @@ -490,6 +512,90 @@ eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_3) clear +# Try different schemas. (The interpolation logic must not depend on the schema.) +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} 2.0 + {schema="0"} 1.4142135623730951 + {schema="+1"} 1.189207 + +eval instant at 1m histogram_fraction(0, 2, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.4142135623730951, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.189207, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +# The same as above, but one bucket "further to the right". +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} 8.0 + {schema="0"} 2.82842712474619 + {schema="+1"} 1.6817928305074292 + +eval instant at 1m histogram_fraction(0, 8, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(0, 2.82842712474619, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.6817928305074292, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +# And everything again but for negative buckets. +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} -2.0 + {schema="0"} -1.4142135623730951 + {schema="+1"} -1.189207 + +eval instant at 1m histogram_fraction(-2, 0, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(-1.4142135623730951, 0, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(-1.189207, 0, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} -8.0 + {schema="0"} -2.82842712474619 + {schema="+1"} -1.6817928305074292 + +eval instant at 1m histogram_fraction(-8, 0, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(-2.82842712474619, 0, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(-1.6817928305074292, 0, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + + # Apply fraction function to empty histogram. load 10m histogram_fraction_1 {{}}x1 @@ -515,11 +621,18 @@ eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2) eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2) {} 0.16666666666666666 +# Note that this result and the one above add up to 1. +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) + {} 0.8333333333333334 + +# We are in the zero bucket, resulting in linear interpolation eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2) {} 0.08333333333333333 -eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) - {} 0.8333333333333334 +# Demonstrate that the inverse operation with histogram_quantile yields +# the original value with the non-trivial result above. +eval instant at 10m histogram_quantile(0.08333333333333333, histogram_fraction_2) + {} 0.0005 eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) {} 0 @@ -527,17 +640,30 @@ eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2) {} 0.25 +# More non-trivial results with interpolation involved below, including +# some round-trips via histogram_quantile to prove that the inverse +# operation leads to the same results. + +eval instant at 10m histogram_fraction(0, 1.5, histogram_fraction_2) + {} 0.4795739585136224 + eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2) - {} 0.125 + {} 0.10375937481971091 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2) {} 0.3333333333333333 +eval instant at 10m histogram_fraction(0, 6, histogram_fraction_2) + {} 0.6320802083934297 + +eval instant at 10m histogram_quantile(0.6320802083934297, histogram_fraction_2) + {} 6 + eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2) - {} 0.2916666666666667 + {} 0.29874687506009634 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2) - {} 0.16666666666666666 + {} 0.15250624987980724 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2) {} 0 @@ -600,6 +726,12 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3) eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3) {} 0.08333333333333333 +eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_3) + {} 0.9166666666666666 + +eval instant at 10m histogram_quantile(0.9166666666666666, histogram_fraction_3) + {} -0.0005 + eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3) {} 0 @@ -625,16 +757,22 @@ eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3) {} 0.25 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3) - {} 0.125 + {} 0.10375937481971091 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3) {} 0.3333333333333333 +eval instant at 10m histogram_fraction(-inf, -6, histogram_fraction_3) + {} 0.36791979160657035 + +eval instant at 10m histogram_quantile(0.36791979160657035, histogram_fraction_3) + {} -6 + eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3) - {} 0.2916666666666667 + {} 0.29874687506009634 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3) - {} 0.16666666666666666 + {} 0.15250624987980724 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3) {} 0 @@ -684,6 +822,18 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4) eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4) {} 0.08333333333333333 +eval instant at 10m histogram_fraction(-inf, 0.0005, histogram_fraction_4) + {} 0.5416666666666666 + +eval instant at 10m histogram_quantile(0.5416666666666666, histogram_fraction_4) + {} 0.0005 + +eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_4) + {} 0.4583333333333333 + +eval instant at 10m histogram_quantile(0.4583333333333333, histogram_fraction_4) + {} -0.0005 + eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4) {} 0.4166666666666667 @@ -694,31 +844,31 @@ eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4) {} 0.125 eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4) - {} 0.0625 + {} 0.051879687409855414 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4) {} 0.16666666666666666 eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4) - {} 0.14583333333333334 + {} 0.14937343753004825 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4) - {} 0.08333333333333333 + {} 0.07625312493990366 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4) {} 0.125 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4) - {} 0.0625 + {} 0.051879687409855456 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4) {} 0.16666666666666666 eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4) - {} 0.14583333333333334 + {} 0.14937343753004817 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4) - {} 0.08333333333333333 + {} 0.07625312493990362 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4) {} 0 @@ -752,27 +902,39 @@ eval instant at 10m histogram_sum(scalar(histogram_fraction(-Inf, +Inf, sum(hist # Apply multiplication and division operator to histogram. load 10m - histogram_mul_div {{schema:0 count:21 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[3 3 3]}}x1 + histogram_mul_div {{schema:0 count:30 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[6 6 6]}}x1 float_series_3 3+0x1 float_series_0 0+0x1 eval instant at 10m histogram_mul_div*3 - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} + +eval instant at 10m histogram_mul_div*-1 + {} {{schema:0 count:-30 sum:-33 z_bucket:-3 z_bucket_w:0.001 buckets:[-3 -3 -3] n_buckets:[-6 -6 -6]}} + +eval instant at 10m -histogram_mul_div + {} {{schema:0 count:-30 sum:-33 z_bucket:-3 z_bucket_w:0.001 buckets:[-3 -3 -3] n_buckets:[-6 -6 -6]}} + +eval instant at 10m histogram_mul_div*-3 + {} {{schema:0 count:-90 sum:-99 z_bucket:-9 z_bucket_w:0.001 buckets:[-9 -9 -9] n_buckets:[-18 -18 -18]}} eval instant at 10m 3*histogram_mul_div - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m histogram_mul_div*float_series_3 - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m float_series_3*histogram_mul_div - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m histogram_mul_div/3 - {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} + +eval instant at 10m histogram_mul_div/-3 + {} {{schema:0 count:-10 sum:-11 z_bucket:-1 z_bucket_w:0.001 buckets:[-1 -1 -1] n_buckets:[-2 -2 -2]}} eval instant at 10m histogram_mul_div/float_series_3 - {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} eval instant at 10m histogram_mul_div*0 {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} @@ -786,18 +948,40 @@ eval instant at 10m histogram_mul_div*float_series_0 eval instant at 10m float_series_0*histogram_mul_div {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} -# TODO: (NeerajGartia21) remove all the histogram buckets in case of division with zero. See: https://github.com/prometheus/prometheus/issues/13934 eval instant at 10m histogram_mul_div/0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div/float_series_0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div*0/0 - {} {{schema:0 count:NaN sum:NaN z_bucket:NaN z_bucket_w:0.001 buckets:[NaN NaN NaN] n_buckets:[NaN NaN NaN]}} + {} {{schema:0 count:NaN sum:NaN z_bucket_w:0.001 z_bucket:NaN}} + +eval_info instant at 10m histogram_mul_div*histogram_mul_div + +eval_info instant at 10m histogram_mul_div/histogram_mul_div + +eval_info instant at 10m float_series_3/histogram_mul_div + +eval_info instant at 10m 0/histogram_mul_div clear +# Apply binary operators to mixed histogram and float samples. +# TODO:(NeerajGartia21) move these tests to their respective locations when tests from engine_test.go are be moved here. + +load 10m + histogram_sample {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + float_sample 0x1 + +eval_info instant at 10m float_sample+histogram_sample + +eval_info instant at 10m histogram_sample+float_sample + +eval_info instant at 10m float_sample-histogram_sample + +eval_info instant at 10m histogram_sample-float_sample + # Counter reset only noticeable in a single bucket. load 5m reset_in_bucket {{schema:0 count:4 sum:5 buckets:[1 2 1]}} {{schema:0 count:5 sum:6 buckets:[1 1 3]}} {{schema:0 count:6 sum:7 buckets:[1 2 3]}} diff --git a/promql/quantile.go b/promql/quantile.go index 7ddb76acba..06775d3ae6 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -153,19 +153,31 @@ func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) { // histogramQuantile calculates the quantile 'q' based on the given histogram. // -// The quantile value is interpolated assuming a linear distribution within a -// bucket. -// TODO(beorn7): Find an interpolation method that is a better fit for -// exponential buckets (and think about configurable interpolation). +// For custom buckets, the result is interpolated linearly, i.e. it is assumed +// the observations are uniformly distributed within each bucket. (This is a +// quite blunt assumption, but it is consistent with the interpolation method +// used for classic histograms so far.) +// +// For exponential buckets, the interpolation is done under the assumption that +// the samples within each bucket are distributed in a way that they would +// uniformly populate the buckets in a hypothetical histogram with higher +// resolution. For example, if the rank calculation suggests that the requested +// quantile is right in the middle of the population of the (1,2] bucket, we +// assume the quantile would be right at the bucket boundary between the two +// buckets the (1,2] bucket would be divided into if the histogram had double +// the resolution, which is 2**2**-1 = 1.4142... We call this exponential +// interpolation. +// +// However, for a quantile that ends up in the zero bucket, this method isn't +// very helpful (because there is an infinite number of buckets close to zero, +// so we would have to assume zero as the result). Therefore, we return to +// linear interpolation in the zero bucket. // // A natural lower bound of 0 is assumed if the histogram has only positive // buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has // only negative buckets. -// TODO(beorn7): Come to terms if we want that. // -// There are a number of special cases (once we have a way to report errors -// happening during evaluations of AST functions, we should report those -// explicitly): +// There are a number of special cases: // // If the histogram has 0 observations, NaN is returned. // @@ -193,9 +205,9 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { rank float64 ) - // if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator - // if the q < 0.5, use the forward iterator - // if the q >= 0.5, use the reverse iterator + // If there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator. + // If q < 0.5, use the forward iterator. + // If q >= 0.5, use the reverse iterator. if math.IsNaN(h.Sum) || q < 0.5 { it = h.AllBucketIterator() rank = q * h.Count @@ -260,8 +272,29 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { rank = count - rank } - // TODO(codesome): Use a better estimation than linear. - return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count) + // The fraction of how far we are into the current bucket. + fraction := rank / bucket.Count + + // Return linear interpolation for custom buckets and for quantiles that + // end up in the zero bucket. + if h.UsesCustomBuckets() || (bucket.Lower <= 0 && bucket.Upper >= 0) { + return bucket.Lower + (bucket.Upper-bucket.Lower)*fraction + } + + // For exponential buckets, we interpolate on a logarithmic scale. On a + // logarithmic scale, the exponential bucket boundaries (for any schema) + // become linear (every bucket has the same width). Therefore, after + // taking the logarithm of both bucket boundaries, we can use the + // calculated fraction in the same way as for linear interpolation (see + // above). Finally, we return to the normal scale by applying the + // exponential function to the result. + logLower := math.Log2(math.Abs(bucket.Lower)) + logUpper := math.Log2(math.Abs(bucket.Upper)) + if bucket.Lower > 0 { // Positive bucket. + return math.Exp2(logLower + (logUpper-logLower)*fraction) + } + // Otherwise, we are in a negative bucket and have to mirror things. + return -math.Exp2(logUpper + (logLower-logUpper)*(1-fraction)) } // histogramFraction calculates the fraction of observations between the @@ -271,8 +304,8 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { // histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h) // returns 0.9. // -// The same notes (and TODOs) with regard to interpolation and assumptions about -// the zero bucket boundaries apply as for histogramQuantile. +// The same notes with regard to interpolation and assumptions about the zero +// bucket boundaries apply as for histogramQuantile. // // Whether either boundary is inclusive or exclusive doesn’t actually matter as // long as interpolation has to be performed anyway. In the case of a boundary @@ -310,7 +343,35 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 ) for it.Next() { b := it.At() - if b.Lower < 0 && b.Upper > 0 { + zeroBucket := false + + // interpolateLinearly is used for custom buckets to be + // consistent with the linear interpolation known from classic + // histograms. It is also used for the zero bucket. + interpolateLinearly := func(v float64) float64 { + return rank + b.Count*(v-b.Lower)/(b.Upper-b.Lower) + } + + // interpolateExponentially is using the same exponential + // interpolation method as above for histogramQuantile. This + // method is a better fit for exponential bucketing. + interpolateExponentially := func(v float64) float64 { + var ( + logLower = math.Log2(math.Abs(b.Lower)) + logUpper = math.Log2(math.Abs(b.Upper)) + logV = math.Log2(math.Abs(v)) + fraction float64 + ) + if v > 0 { + fraction = (logV - logLower) / (logUpper - logLower) + } else { + fraction = 1 - ((logV - logUpper) / (logLower - logUpper)) + } + return rank + b.Count*fraction + } + + if b.Lower <= 0 && b.Upper >= 0 { + zeroBucket = true switch { case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0: // This is the zero bucket and the histogram has only @@ -325,10 +386,12 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 } } if !lowerSet && b.Lower >= lower { + // We have hit the lower value at the lower bucket boundary. lowerRank = rank lowerSet = true } if !upperSet && b.Lower >= upper { + // We have hit the upper value at the lower bucket boundary. upperRank = rank upperSet = true } @@ -336,11 +399,21 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 break } if !lowerSet && b.Lower < lower && b.Upper > lower { - lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower) + // The lower value is in this bucket. + if h.UsesCustomBuckets() || zeroBucket { + lowerRank = interpolateLinearly(lower) + } else { + lowerRank = interpolateExponentially(lower) + } lowerSet = true } if !upperSet && b.Lower < upper && b.Upper > upper { - upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower) + // The upper value is in this bucket. + if h.UsesCustomBuckets() || zeroBucket { + upperRank = interpolateLinearly(upper) + } else { + upperRank = interpolateExponentially(upper) + } upperSet = true } if lowerSet && upperSet { diff --git a/promql/query_logger.go b/promql/query_logger.go index 7e06ebb97f..c0a70b66d7 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "strings" @@ -26,14 +27,12 @@ import ( "unicode/utf8" "github.com/edsrzf/mmap-go" - "github.com/go-kit/log" - "github.com/go-kit/log/level" ) type ActiveQueryTracker struct { - mmapedFile []byte + mmappedFile []byte getNextIndex chan int - logger log.Logger + logger *slog.Logger closer io.Closer maxConcurrent int } @@ -63,11 +62,11 @@ func parseBrokenJSON(brokenJSON []byte) (string, bool) { return queries, true } -func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { +func logUnfinishedQueries(filename string, filesize int, logger *slog.Logger) { if _, err := os.Stat(filename); err == nil { fd, err := os.Open(filename) if err != nil { - level.Error(logger).Log("msg", "Failed to open query log file", "err", err) + logger.Error("Failed to open query log file", "err", err) return } defer fd.Close() @@ -75,7 +74,7 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { brokenJSON := make([]byte, filesize) _, err = fd.Read(brokenJSON) if err != nil { - level.Error(logger).Log("msg", "Failed to read query log file", "err", err) + logger.Error("Failed to read query log file", "err", err) return } @@ -83,72 +82,72 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { if !queriesExist { return } - level.Info(logger).Log("msg", "These queries didn't finish in prometheus' last run:", "queries", queries) + logger.Info("These queries didn't finish in prometheus' last run:", "queries", queries) } } -type mmapedFile struct { +type mmappedFile struct { f io.Closer m mmap.MMap } -func (f *mmapedFile) Close() error { +func (f *mmappedFile) Close() error { err := f.m.Unmap() if err != nil { - err = fmt.Errorf("mmapedFile: unmapping: %w", err) + err = fmt.Errorf("mmappedFile: unmapping: %w", err) } if fErr := f.f.Close(); fErr != nil { - return errors.Join(fmt.Errorf("close mmapedFile.f: %w", fErr), err) + return errors.Join(fmt.Errorf("close mmappedFile.f: %w", fErr), err) } return err } -func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { +func getMMappedFile(filename string, filesize int, logger *slog.Logger) ([]byte, io.Closer, error) { file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666) if err != nil { absPath, pathErr := filepath.Abs(filename) if pathErr != nil { absPath = filename } - level.Error(logger).Log("msg", "Error opening query log file", "file", absPath, "err", err) + logger.Error("Error opening query log file", "file", absPath, "err", err) return nil, nil, err } err = file.Truncate(int64(filesize)) if err != nil { file.Close() - level.Error(logger).Log("msg", "Error setting filesize.", "filesize", filesize, "err", err) + logger.Error("Error setting filesize.", "filesize", filesize, "err", err) return nil, nil, err } fileAsBytes, err := mmap.Map(file, mmap.RDWR, 0) if err != nil { file.Close() - level.Error(logger).Log("msg", "Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) + logger.Error("Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) return nil, nil, err } - return fileAsBytes, &mmapedFile{f: file, m: fileAsBytes}, err + return fileAsBytes, &mmappedFile{f: file, m: fileAsBytes}, err } -func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger log.Logger) *ActiveQueryTracker { +func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger *slog.Logger) *ActiveQueryTracker { err := os.MkdirAll(localStoragePath, 0o777) if err != nil { - level.Error(logger).Log("msg", "Failed to create directory for logging active queries") + logger.Error("Failed to create directory for logging active queries") } filename, filesize := filepath.Join(localStoragePath, "queries.active"), 1+maxConcurrent*entrySize logUnfinishedQueries(filename, filesize, logger) - fileAsBytes, closer, err := getMMapedFile(filename, filesize, logger) + fileAsBytes, closer, err := getMMappedFile(filename, filesize, logger) if err != nil { panic("Unable to create mmap-ed active query log") } copy(fileAsBytes, "[") activeQueryTracker := ActiveQueryTracker{ - mmapedFile: fileAsBytes, + mmappedFile: fileAsBytes, closer: closer, getNextIndex: make(chan int, maxConcurrent), logger: logger, @@ -174,18 +173,18 @@ func trimStringByBytes(str string, size int) string { return string(bytesStr[:trimIndex]) } -func _newJSONEntry(query string, timestamp int64, logger log.Logger) []byte { +func _newJSONEntry(query string, timestamp int64, logger *slog.Logger) []byte { entry := Entry{query, timestamp} jsonEntry, err := json.Marshal(entry) if err != nil { - level.Error(logger).Log("msg", "Cannot create json of query", "query", query) + logger.Error("Cannot create json of query", "query", query) return []byte{} } return jsonEntry } -func newJSONEntry(query string, logger log.Logger) []byte { +func newJSONEntry(query string, logger *slog.Logger) []byte { timestamp := time.Now().Unix() minEntryJSON := _newJSONEntry("", timestamp, logger) @@ -206,14 +205,14 @@ func (tracker ActiveQueryTracker) GetMaxConcurrent() int { } func (tracker ActiveQueryTracker) Delete(insertIndex int) { - copy(tracker.mmapedFile[insertIndex:], strings.Repeat("\x00", entrySize)) + copy(tracker.mmappedFile[insertIndex:], strings.Repeat("\x00", entrySize)) tracker.getNextIndex <- insertIndex } func (tracker ActiveQueryTracker) Insert(ctx context.Context, query string) (int, error) { select { case i := <-tracker.getNextIndex: - fileBytes := tracker.mmapedFile + fileBytes := tracker.mmappedFile entry := newJSONEntry(query, tracker.logger) start, end := i, i+entrySize diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 7bd93781ec..eb06e513ef 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -26,7 +26,7 @@ import ( func TestQueryLogging(t *testing.T) { fileAsBytes := make([]byte, 4096) queryLogger := ActiveQueryTracker{ - mmapedFile: fileAsBytes, + mmappedFile: fileAsBytes, logger: nil, getNextIndex: make(chan int, 4), } @@ -70,7 +70,7 @@ func TestQueryLogging(t *testing.T) { func TestIndexReuse(t *testing.T) { queryBytes := make([]byte, 1+3*entrySize) queryLogger := ActiveQueryTracker{ - mmapedFile: queryBytes, + mmappedFile: queryBytes, logger: nil, getNextIndex: make(chan int, 3), } @@ -106,10 +106,10 @@ func TestIndexReuse(t *testing.T) { func TestMMapFile(t *testing.T) { dir := t.TempDir() - fpath := filepath.Join(dir, "mmapedFile") + fpath := filepath.Join(dir, "mmappedFile") const data = "ab" - fileAsBytes, closer, err := getMMapedFile(fpath, 2, nil) + fileAsBytes, closer, err := getMMappedFile(fpath, 2, nil) require.NoError(t, err) copy(fileAsBytes, data) require.NoError(t, closer.Close()) diff --git a/promql/value.go b/promql/value.go index f25dbcd780..f19c0b5b58 100644 --- a/promql/value.go +++ b/promql/value.go @@ -526,7 +526,7 @@ func (ssi *storageSeriesIterator) Next() chunkenc.ValueType { ssi.currH = p.H return chunkenc.ValFloatHistogram default: - panic("storageSeriesIterater.Next failed to pick value type") + panic("storageSeriesIterator.Next failed to pick value type") } } diff --git a/rules/alerting.go b/rules/alerting.go index 2dc0917dce..7e74c176aa 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -16,13 +16,12 @@ package rules import ( "context" "fmt" + "log/slog" "net/url" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -141,7 +140,7 @@ type AlertingRule struct { // the fingerprint of the labelset they correspond to. active map[uint64]*Alert - logger log.Logger + logger *slog.Logger noDependentRules *atomic.Bool noDependencyRules *atomic.Bool @@ -151,7 +150,7 @@ type AlertingRule struct { func NewAlertingRule( name string, vec parser.Expr, hold, keepFiringFor time.Duration, labels, annotations, externalLabels labels.Labels, externalURL string, - restored bool, logger log.Logger, + restored bool, logger *slog.Logger, ) *AlertingRule { el := externalLabels.Map() @@ -381,7 +380,7 @@ func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts t result, err := tmpl.Expand() if err != nil { result = fmt.Sprintf("", err) - level.Warn(r.logger).Log("msg", "Expanding alert template failed", "err", err, "data", tmplData) + r.logger.Warn("Expanding alert template failed", "err", err, "data", tmplData) } return result } diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 67d683c851..f0aa339cc7 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -19,8 +19,8 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -276,7 +276,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) ruleWithExternalLabels := NewAlertingRule( "ExternalLabelExists", @@ -287,7 +287,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { labels.EmptyLabels(), labels.FromStrings("foo", "bar", "dings", "bums"), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -371,7 +371,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) ruleWithExternalURL := NewAlertingRule( "ExternalURLExists", @@ -382,7 +382,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "http://localhost:1234", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -466,7 +466,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -527,7 +527,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; `), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) evalTime := time.Unix(0, 0) @@ -607,7 +607,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) @@ -651,7 +651,7 @@ func TestAlertingRuleLimit(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) evalTime := time.Unix(0, 0) @@ -779,7 +779,7 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) { }, }, } - nm := notifier.NewManager(&opts, log.NewNopLogger()) + nm := notifier.NewManager(&opts, promslog.NewNopLogger()) f := SendAlerts(nm, "") notifyFunc := func(ctx context.Context, expr string, alerts ...*Alert) { @@ -986,7 +986,7 @@ func TestAlertingEvalWithOrigin(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { @@ -1008,7 +1008,7 @@ func TestAlertingRule_SetNoDependentRules(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) require.False(t, rule.NoDependentRules()) @@ -1029,7 +1029,7 @@ func TestAlertingRule_SetNoDependencyRules(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) require.False(t, rule.NoDependencyRules()) diff --git a/rules/fixtures/rules1.yaml b/rules/fixtures/rules1.yaml new file mode 100644 index 0000000000..76fbf71f3b --- /dev/null +++ b/rules/fixtures/rules1.yaml @@ -0,0 +1,5 @@ +groups: + - name: test_1 + rules: + - record: test_2 + expr: vector(2) diff --git a/rules/group.go b/rules/group.go index 201d3a67d7..7dd046b57a 100644 --- a/rules/group.go +++ b/rules/group.go @@ -16,6 +16,7 @@ package rules import ( "context" "errors" + "log/slog" "math" "slices" "strings" @@ -26,10 +27,9 @@ import ( "github.com/prometheus/prometheus/promql/parser" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -65,7 +65,7 @@ type Group struct { terminated chan struct{} managerDone chan struct{} - logger log.Logger + logger *slog.Logger metrics *Metrics @@ -75,6 +75,7 @@ type Group struct { // concurrencyController controls the rules evaluation concurrency. concurrencyController RuleConcurrencyController + appOpts *storage.AppendOptions } // GroupEvalIterationFunc is used to implement and extend rule group @@ -124,6 +125,10 @@ func NewGroup(o GroupOptions) *Group { concurrencyController = sequentialRuleEvalController{} } + if o.Opts.Logger == nil { + promslog.NewNopLogger() + } + return &Group{ name: o.Name, file: o.File, @@ -137,10 +142,11 @@ func NewGroup(o GroupOptions) *Group { done: make(chan struct{}), managerDone: o.done, terminated: make(chan struct{}), - logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name), + logger: o.Opts.Logger.With("file", o.File, "group", o.Name), metrics: metrics, evalIterationFunc: evalIterationFunc, concurrencyController: concurrencyController, + appOpts: &storage.AppendOptions{DiscardOutOfOrder: true}, } } @@ -188,7 +194,7 @@ func matchesMatcherSets(matcherSets [][]*labels.Matcher, lbls labels.Labels) boo return ok } -// Queryable returns the group's querable. +// Queryable returns the group's queryable. func (g *Group) Queryable() storage.Queryable { return g.opts.Queryable } // Context returns the group's context. @@ -200,7 +206,7 @@ func (g *Group) Interval() time.Duration { return g.interval } // Limit returns the group's limit. func (g *Group) Limit() int { return g.limit } -func (g *Group) Logger() log.Logger { return g.logger } +func (g *Group) Logger() *slog.Logger { return g.logger } func (g *Group) run(ctx context.Context) { defer close(g.terminated) @@ -272,7 +278,7 @@ func (g *Group) run(ctx context.Context) { g.RestoreForState(restoreStartTime) totalRestoreTimeSeconds := time.Since(restoreStartTime).Seconds() g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(totalRestoreTimeSeconds) - level.Debug(g.logger).Log("msg", "'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) + g.logger.Debug("'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) g.shouldRestore = false } @@ -495,7 +501,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { defer cleanup() } - logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i) + logger := g.logger.With("name", rule.Name(), "index", i) ctx, sp := otel.Tracer("").Start(ctx, "rule") sp.SetAttributes(attribute.String("name", rule.Name())) defer func(t time.Time) { @@ -508,7 +514,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { }(time.Now()) if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() { - logger = log.WithPrefix(logger, "trace_id", sp.SpanContext().TraceID()) + logger = logger.With("trace_id", sp.SpanContext().TraceID()) } g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() @@ -524,7 +530,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { // happens on shutdown and thus we skip logging of any errors here. var eqc promql.ErrQueryCanceled if !errors.As(err, &eqc) { - level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err) + logger.Warn("Evaluating rule failed", "rule", rule, "err", err) } return } @@ -550,7 +556,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { sp.SetStatus(codes.Error, err.Error()) g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() - level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err) + logger.Warn("Rule sample appending failed", "err", err) return } g.seriesInPreviousEval[i] = seriesReturned @@ -560,6 +566,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if s.H != nil { _, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H) } else { + app.SetOptions(g.appOpts) _, err = app.Append(0, s.Metric, s.T, s.F) } @@ -574,15 +581,15 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { switch { case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample): numOutOfOrder++ - level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Debug("Rule evaluation result discarded", "err", err, "sample", s) case errors.Is(unwrappedErr, storage.ErrTooOldSample): numTooOld++ - level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Debug("Rule evaluation result discarded", "err", err, "sample", s) case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp): numDuplicates++ - level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Debug("Rule evaluation result discarded", "err", err, "sample", s) default: - level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Warn("Rule evaluation result discarded", "err", err, "sample", s) } } else { buf := [1024]byte{} @@ -590,13 +597,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { } } if numOutOfOrder > 0 { - level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "num_dropped", numOutOfOrder) + logger.Warn("Error on ingesting out-of-order result from rule evaluation", "num_dropped", numOutOfOrder) } if numTooOld > 0 { - level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "num_dropped", numTooOld) + logger.Warn("Error on ingesting too old result from rule evaluation", "num_dropped", numTooOld) } if numDuplicates > 0 { - level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "num_dropped", numDuplicates) + logger.Warn("Error on ingesting results from rule evaluation with different value but same timestamp", "num_dropped", numDuplicates) } for metric, lset := range g.seriesInPreviousEval[i] { @@ -615,7 +622,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { // Do not count these in logging, as this is expected if series // is exposed from a different rule. default: - level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err) + logger.Warn("Adding stale sample failed", "sample", lset.String(), "err", err) } } } @@ -656,6 +663,7 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { return } app := g.opts.Appendable.Appender(ctx) + app.SetOptions(g.appOpts) queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. @@ -672,11 +680,11 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { // Do not count these in logging, as this is expected if series // is exposed from a different rule. default: - level.Warn(g.logger).Log("msg", "Adding stale sample for previous configuration failed", "sample", s, "err", err) + g.logger.Warn("Adding stale sample for previous configuration failed", "sample", s, "err", err) } } if err := app.Commit(); err != nil { - level.Warn(g.logger).Log("msg", "Stale sample appending for previous configuration failed", "err", err) + g.logger.Warn("Stale sample appending for previous configuration failed", "err", err) } else { g.staleSeries = nil } @@ -691,12 +699,12 @@ func (g *Group) RestoreForState(ts time.Time) { mintMS := int64(model.TimeFromUnixNano(mint.UnixNano())) q, err := g.opts.Queryable.Querier(mintMS, maxtMS) if err != nil { - level.Error(g.logger).Log("msg", "Failed to get Querier", "err", err) + g.logger.Error("Failed to get Querier", "err", err) return } defer func() { if err := q.Close(); err != nil { - level.Error(g.logger).Log("msg", "Failed to close Querier", "err", err) + g.logger.Error("Failed to close Querier", "err", err) } }() @@ -717,8 +725,8 @@ func (g *Group) RestoreForState(ts time.Time) { sset, err := alertRule.QueryForStateSeries(g.opts.Context, q) if err != nil { - level.Error(g.logger).Log( - "msg", "Failed to restore 'for' state", + g.logger.Error( + "Failed to restore 'for' state", labels.AlertName, alertRule.Name(), "stage", "Select", "err", err, @@ -737,7 +745,7 @@ func (g *Group) RestoreForState(ts time.Time) { // No results for this alert rule. if len(seriesByLabels) == 0 { - level.Debug(g.logger).Log("msg", "No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) + g.logger.Debug("No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) alertRule.SetRestored(true) continue } @@ -757,7 +765,7 @@ func (g *Group) RestoreForState(ts time.Time) { t, v = it.At() } if it.Err() != nil { - level.Error(g.logger).Log("msg", "Failed to restore 'for' state", + g.logger.Error("Failed to restore 'for' state", labels.AlertName, alertRule.Name(), "stage", "Iterator", "err", it.Err()) return } @@ -799,7 +807,7 @@ func (g *Group) RestoreForState(ts time.Time) { } a.ActiveAt = restoredActiveAt - level.Debug(g.logger).Log("msg", "'for' state restored", + g.logger.Debug("'for' state restored", labels.AlertName, alertRule.Name(), "restored_time", a.ActiveAt.Format(time.RFC850), "labels", a.Labels.String()) }) diff --git a/rules/manager.go b/rules/manager.go index 9e5b33fbc9..6e9bf64691 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -17,15 +17,15 @@ import ( "context" "errors" "fmt" + "log/slog" "net/url" "slices" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "golang.org/x/sync/semaphore" "github.com/prometheus/prometheus/model/labels" @@ -96,7 +96,7 @@ type Manager struct { done chan struct{} restored bool - logger log.Logger + logger *slog.Logger } // NotifyFunc sends notifications about a set of alerts generated by the given expression. @@ -110,7 +110,7 @@ type ManagerOptions struct { Context context.Context Appendable storage.Appendable Queryable storage.Queryable - Logger log.Logger + Logger *slog.Logger Registerer prometheus.Registerer OutageTolerance time.Duration ForGracePeriod time.Duration @@ -148,6 +148,10 @@ func NewManager(o *ManagerOptions) *Manager { o.RuleDependencyController = ruleDependencyController{} } + if o.Logger == nil { + o.Logger = promslog.NewNopLogger() + } + m := &Manager{ groups: map[string]*Group{}, opts: o, @@ -161,7 +165,7 @@ func NewManager(o *ManagerOptions) *Manager { // Run starts processing of the rule manager. It is blocking. func (m *Manager) Run() { - level.Info(m.logger).Log("msg", "Starting rule manager...") + m.logger.Info("Starting rule manager...") m.start() <-m.done } @@ -175,7 +179,7 @@ func (m *Manager) Stop() { m.mtx.Lock() defer m.mtx.Unlock() - level.Info(m.logger).Log("msg", "Stopping rule manager...") + m.logger.Info("Stopping rule manager...") for _, eg := range m.groups { eg.stop() @@ -185,7 +189,7 @@ func (m *Manager) Stop() { // staleness markers. close(m.done) - level.Info(m.logger).Log("msg", "Rule manager stopped") + m.logger.Info("Rule manager stopped") } // Update the rule manager's state as the config requires. If @@ -206,7 +210,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels if errs != nil { for _, e := range errs { - level.Error(m.logger).Log("msg", "loading groups failed", "err", e) + m.logger.Error("loading groups failed", "err", e) } return errors.New("error loading rules, previous rule set restored") } @@ -312,25 +316,27 @@ func (m *Manager) LoadGroups( return nil, []error{fmt.Errorf("%s: %w", fn, err)} } + mLabels := FromMaps(rg.Labels, r.Labels) + if r.Alert.Value != "" { rules = append(rules, NewAlertingRule( r.Alert.Value, expr, time.Duration(r.For), time.Duration(r.KeepFiringFor), - labels.FromMap(r.Labels), + mLabels, labels.FromMap(r.Annotations), externalLabels, externalURL, m.restored, - log.With(m.logger, "alert", r.Alert), + m.logger.With("alert", r.Alert), )) continue } rules = append(rules, NewRecordingRule( r.Record.Value, expr, - labels.FromMap(r.Labels), + mLabels, )) } @@ -501,3 +507,16 @@ func (c sequentialRuleEvalController) Allow(_ context.Context, _ *Group, _ Rule) } func (c sequentialRuleEvalController) Done(_ context.Context) {} + +// FromMaps returns new sorted Labels from the given maps, overriding each other in order. +func FromMaps(maps ...map[string]string) labels.Labels { + mLables := make(map[string]string) + + for _, m := range maps { + for k, v := range m { + mLables[k] = v + } + } + + return labels.FromMap(mLables) +} diff --git a/rules/manager_test.go b/rules/manager_test.go index b9f6db3273..6afac993d8 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -26,10 +26,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -374,7 +374,7 @@ func TestForStateRestore(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, OutageTolerance: 30 * time.Minute, ForGracePeriod: 10 * time.Minute, @@ -547,7 +547,7 @@ func TestStaleness(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("a + 1") @@ -641,7 +641,7 @@ groups: require.NoError(t, err) m := NewManager(&ManagerOptions{ - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), DefaultRuleQueryOffset: func() time.Duration { return time.Minute }, @@ -781,7 +781,7 @@ func TestUpdate(t *testing.T) { Queryable: st, QueryFunc: EngineQueryFunc(engine, st), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() defer ruleManager.Stop() @@ -855,10 +855,11 @@ type ruleGroupsTest struct { // ruleGroupTest forms a testing struct for running tests over rules. type ruleGroupTest struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []rulefmt.Rule `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []rulefmt.Rule `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { @@ -881,6 +882,7 @@ func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { Interval: g.Interval, Limit: g.Limit, Rules: rtmp, + Labels: g.Labels, }) } return ruleGroupsTest{ @@ -923,14 +925,14 @@ func TestNotify(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: notifyFunc, ResendDelay: 2 * time.Second, } expr, err := parser.ParseExpr("a > 1") require.NoError(t, err) - rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) group := NewGroup(GroupOptions{ Name: "alert", Interval: time.Second, @@ -994,7 +996,7 @@ func TestMetricsUpdate(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Registerer: registry, }) ruleManager.start() @@ -1068,7 +1070,7 @@ func TestGroupStalenessOnRemoval(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) var stopped bool ruleManager.start() @@ -1145,7 +1147,7 @@ func TestMetricsStalenessOnManagerShutdown(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) var stopped bool ruleManager.start() @@ -1193,6 +1195,53 @@ func countStaleNaN(t *testing.T, st storage.Storage) int { return c } +func TestRuleMovedBetweenGroups(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + storage := teststorage.New(t, 600000) + defer storage.Close() + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(opts) + ruleManager := NewManager(&ManagerOptions{ + Appendable: storage, + Queryable: storage, + QueryFunc: EngineQueryFunc(engine, storage), + Context: context.Background(), + Logger: promslog.NewNopLogger(), + }) + var stopped bool + ruleManager.start() + defer func() { + if !stopped { + ruleManager.Stop() + } + }() + + rule2 := "fixtures/rules2.yaml" + rule1 := "fixtures/rules1.yaml" + + // Load initial configuration of rules2 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule2}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated + time.Sleep(3 * time.Second) + + // Reload configuration of rules1 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule1}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated in new location and potential staleness marker + time.Sleep(3 * time.Second) + + require.Equal(t, 0, countStaleNaN(t, storage)) // Not expecting any stale markers. +} + func TestGroupHasAlertingRules(t *testing.T) { tests := []struct { group *Group @@ -1247,7 +1296,7 @@ func TestRuleHealthUpdates(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("a + 1") @@ -1345,7 +1394,7 @@ func TestRuleGroupEvalIterationFunc(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, OutageTolerance: 30 * time.Minute, ForGracePeriod: 10 * time.Minute, @@ -1431,7 +1480,7 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum(histogram_metric)") @@ -1479,7 +1528,7 @@ func TestManager_LoadGroups_ShouldCheckWhetherEachRuleHasDependentsAndDependenci ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Appendable: storage, QueryFunc: func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) { return nil, nil }, }) @@ -1535,7 +1584,7 @@ func TestDependencyMap(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1544,7 +1593,7 @@ func TestDependencyMap(t *testing.T) { expr, err = parser.ParseExpr("user:requests:rate1m <= 0") require.NoError(t, err) - rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) expr, err = parser.ParseExpr("sum by (user) (rate(requests[5m]))") require.NoError(t, err) @@ -1584,7 +1633,7 @@ func TestNoDependency(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1607,7 +1656,7 @@ func TestDependenciesEdgeCases(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } t.Run("empty group", func(t *testing.T) { @@ -1765,7 +1814,7 @@ func TestNoMetricSelector(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1794,7 +1843,7 @@ func TestDependentRulesWithNonMetricExpression(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1803,7 +1852,7 @@ func TestDependentRulesWithNonMetricExpression(t *testing.T) { expr, err = parser.ParseExpr("user:requests:rate1m <= 0") require.NoError(t, err) - rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) expr, err = parser.ParseExpr("3") require.NoError(t, err) @@ -1826,7 +1875,7 @@ func TestRulesDependentOnMetaMetrics(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } // This rule is not dependent on any other rules in its group but it does depend on `ALERTS`, which is produced by @@ -1855,7 +1904,7 @@ func TestDependencyMapUpdatesOnGroupUpdate(t *testing.T) { files := []string{"fixtures/rules.yaml"} ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() @@ -2107,7 +2156,7 @@ func TestUpdateWhenStopped(t *testing.T) { files := []string{"fixtures/rules.yaml"} ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() err := ruleManager.Update(10*time.Second, files, labels.EmptyLabels(), "", nil) @@ -2129,7 +2178,7 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I return &ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), ConcurrentEvalsEnabled: concurrent, MaxConcurrentEvals: maxConcurrent, Appendable: storage, @@ -2158,3 +2207,18 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I }, } } + +func TestLabels_FromMaps(t *testing.T) { + mLabels := FromMaps( + map[string]string{"aaa": "101", "bbb": "222"}, + map[string]string{"aaa": "111", "ccc": "333"}, + ) + + expected := labels.New( + labels.Label{Name: "aaa", Value: "111"}, + labels.Label{Name: "bbb", Value: "222"}, + labels.Label{Name: "ccc", Value: "333"}, + ) + + require.Equal(t, expected, mLabels, "unexpected labelset") +} diff --git a/rules/origin_test.go b/rules/origin_test.go index 75c83f9a4e..0bf428f3c1 100644 --- a/rules/origin_test.go +++ b/rules/origin_test.go @@ -19,9 +19,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" @@ -96,7 +97,7 @@ func TestNewRuleDetail(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) detail := NewRuleDetail(rule) diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go index 116fa5c94b..12a56d7071 100644 --- a/scrape/helpers_test.go +++ b/scrape/helpers_test.go @@ -43,6 +43,8 @@ func (a nopAppendable) Appender(_ context.Context) storage.Appender { type nopAppender struct{} +func (a nopAppender) SetOptions(opts *storage.AppendOptions) {} + func (a nopAppender) Append(storage.SeriesRef, labels.Labels, int64, float64) (storage.SeriesRef, error) { return 0, nil } @@ -55,6 +57,10 @@ func (a nopAppender) AppendHistogram(storage.SeriesRef, labels.Labels, int64, *h return 0, nil } +func (a nopAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + return 0, nil +} + func (a nopAppender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Metadata) (storage.SeriesRef, error) { return 0, nil } @@ -78,9 +84,10 @@ func equalFloatSamples(a, b floatSample) bool { } type histogramSample struct { - t int64 - h *histogram.Histogram - fh *histogram.FloatHistogram + metric labels.Labels + t int64 + h *histogram.Histogram + fh *histogram.FloatHistogram } type collectResultAppendable struct { @@ -109,6 +116,8 @@ type collectResultAppender struct { pendingMetadata []metadata.Metadata } +func (a *collectResultAppender) SetOptions(opts *storage.AppendOptions) {} + func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() @@ -146,7 +155,7 @@ func (a *collectResultAppender) AppendExemplar(ref storage.SeriesRef, l labels.L func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() - a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t}) + a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t, metric: l}) if a.next == nil { return 0, nil } @@ -154,6 +163,13 @@ func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels. return a.next.AppendHistogram(ref, l, t, h, fh) } +func (a *collectResultAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + return a.AppendHistogram(ref, l, ct, &histogram.Histogram{}, nil) + } + return a.AppendHistogram(ref, l, ct, nil, &histogram.FloatHistogram{}) +} + func (a *collectResultAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() diff --git a/scrape/manager.go b/scrape/manager.go index d7786a082b..f3dad2a048 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -17,32 +17,32 @@ import ( "errors" "fmt" "hash/fnv" - "io" + "log/slog" "reflect" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/osutil" "github.com/prometheus/prometheus/util/pool" ) // NewManager is the Manager constructor. -func NewManager(o *Options, logger log.Logger, newScrapeFailureLogger func(string) (log.Logger, error), app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { +func NewManager(o *Options, logger *slog.Logger, newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error), app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { if o == nil { o = &Options{} } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } sm, err := newScrapeMetrics(registerer) @@ -70,8 +70,7 @@ func NewManager(o *Options, logger log.Logger, newScrapeFailureLogger func(strin // Options are the configuration parameters to the scrape manager. type Options struct { - ExtraMetrics bool - NoDefaultPort bool + ExtraMetrics bool // Option used by downstream scraper users like OpenTelemetry Collector // to help lookup metric metadata. Should be false for Prometheus. PassMetadataInContext bool @@ -101,7 +100,7 @@ const DefaultNameEscapingScheme = model.ValueEncodingEscaping // when receiving new target groups from the discovery manager. type Manager struct { opts *Options - logger log.Logger + logger *slog.Logger append storage.Appendable graceShut chan struct{} @@ -109,8 +108,8 @@ type Manager struct { mtxScrape sync.Mutex // Guards the fields below. scrapeConfigs map[string]*config.ScrapeConfig scrapePools map[string]*scrapePool - newScrapeFailureLogger func(string) (log.Logger, error) - scrapeFailureLoggers map[string]log.Logger + newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error) + scrapeFailureLoggers map[string]*logging.JSONFileLogger targetSets map[string][]*targetgroup.Group buffers *pool.Pool @@ -176,21 +175,26 @@ func (m *Manager) reload() { if _, ok := m.scrapePools[setName]; !ok { scrapeConfig, ok := m.scrapeConfigs[setName] if !ok { - level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName) + m.logger.Error("error reloading target set", "err", "invalid config id:"+setName) + continue + } + if scrapeConfig.ConvertClassicHistogramsToNHCB && m.opts.EnableCreatedTimestampZeroIngestion { + // TODO(krajorama): fix https://github.com/prometheus/prometheus/issues/15137 + m.logger.Error("error reloading target set", "err", "cannot convert classic histograms to native histograms with custom buckets and ingest created timestamp zero samples at the same time due to https://github.com/prometheus/prometheus/issues/15137") continue } m.metrics.targetScrapePools.Inc() - sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.buffers, m.opts, m.metrics) + sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics) if err != nil { m.metrics.targetScrapePoolsFailed.Inc() - level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName) + m.logger.Error("error creating new scrape pool", "err", err, "scrape_pool", setName) continue } m.scrapePools[setName] = sp if l, ok := m.scrapeFailureLoggers[scrapeConfig.ScrapeFailureLogFile]; ok { sp.SetScrapeFailureLogger(l) } else { - level.Error(sp.logger).Log("msg", "No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", setName) + sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", setName) } } @@ -247,7 +251,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { } c := make(map[string]*config.ScrapeConfig) - scrapeFailureLoggers := map[string]log.Logger{ + scrapeFailureLoggers := map[string]*logging.JSONFileLogger{ "": nil, // Emptying the file name sets the scrape logger to nil. } for _, scfg := range scfgs { @@ -255,23 +259,23 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { if _, ok := scrapeFailureLoggers[scfg.ScrapeFailureLogFile]; !ok { // We promise to reopen the file on each reload. var ( - l log.Logger - err error + logger *logging.JSONFileLogger + err error ) if m.newScrapeFailureLogger != nil { - if l, err = m.newScrapeFailureLogger(scfg.ScrapeFailureLogFile); err != nil { + if logger, err = m.newScrapeFailureLogger(scfg.ScrapeFailureLogFile); err != nil { return err } } - scrapeFailureLoggers[scfg.ScrapeFailureLogFile] = l + scrapeFailureLoggers[scfg.ScrapeFailureLogFile] = logger } } m.scrapeConfigs = c oldScrapeFailureLoggers := m.scrapeFailureLoggers for _, s := range oldScrapeFailureLoggers { - if closer, ok := s.(io.Closer); ok { - defer closer.Close() + if s != nil { + defer s.Close() } } @@ -291,7 +295,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { case !reflect.DeepEqual(sp.config, cfg): err := sp.reload(cfg) if err != nil { - level.Error(m.logger).Log("msg", "error reloading scrape pool", "err", err, "scrape_pool", name) + m.logger.Error("error reloading scrape pool", "err", err, "scrape_pool", name) failed = true } fallthrough @@ -299,7 +303,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { if l, ok := m.scrapeFailureLoggers[cfg.ScrapeFailureLogFile]; ok { sp.SetScrapeFailureLogger(l) } else { - level.Error(sp.logger).Log("msg", "No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", name) + sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", name) } } } diff --git a/scrape/manager_test.go b/scrape/manager_test.go index 3a1d1e1ddd..c3544f6344 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -14,6 +14,7 @@ package scrape import ( + "bytes" "context" "fmt" "net/http" @@ -26,21 +27,26 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/timestamppb" "gopkg.in/yaml.v2" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/runutil" "github.com/prometheus/prometheus/util/testutil" ) @@ -52,12 +58,11 @@ func init() { func TestPopulateLabels(t *testing.T) { cases := []struct { - in labels.Labels - cfg *config.ScrapeConfig - noDefaultPort bool - res labels.Labels - resOrig labels.Labels - err string + in labels.Labels + cfg *config.ScrapeConfig + res labels.Labels + resOrig labels.Labels + err string }{ // Regular population of scrape config options. { @@ -111,8 +116,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeTimeout: model.Duration(time.Second), }, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4:80", - model.InstanceLabel: "1.2.3.4:80", + model.AddressLabel: "1.2.3.4", + model.InstanceLabel: "1.2.3.4", model.SchemeLabel: "http", model.MetricsPathLabel: "/custom", model.JobLabel: "custom-job", @@ -142,7 +147,7 @@ func TestPopulateLabels(t *testing.T) { ScrapeTimeout: model.Duration(time.Second), }, res: labels.FromMap(map[string]string{ - model.AddressLabel: "[::1]:443", + model.AddressLabel: "[::1]", model.InstanceLabel: "custom-instance", model.SchemeLabel: "https", model.MetricsPathLabel: "/metrics", @@ -365,7 +370,6 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4", model.InstanceLabel: "1.2.3.4", @@ -384,7 +388,7 @@ func TestPopulateLabels(t *testing.T) { model.ScrapeTimeoutLabel: "1s", }), }, - // Remove default port (http). + // verify that the default port is not removed (http). { in: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4:80", @@ -396,9 +400,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4", + model.AddressLabel: "1.2.3.4:80", model.InstanceLabel: "1.2.3.4:80", model.SchemeLabel: "http", model.MetricsPathLabel: "/metrics", @@ -415,7 +418,7 @@ func TestPopulateLabels(t *testing.T) { model.ScrapeTimeoutLabel: "1s", }), }, - // Remove default port (https). + // verify that the default port is not removed (https). { in: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4:443", @@ -427,9 +430,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4", + model.AddressLabel: "1.2.3.4:443", model.InstanceLabel: "1.2.3.4:443", model.SchemeLabel: "https", model.MetricsPathLabel: "/metrics", @@ -450,7 +452,7 @@ func TestPopulateLabels(t *testing.T) { for _, c := range cases { in := c.in.Copy() - res, orig, err := PopulateLabels(labels.NewBuilder(c.in), c.cfg, c.noDefaultPort) + res, orig, err := PopulateLabels(labels.NewBuilder(c.in), c.cfg) if c.err != "" { require.EqualError(t, err, c.err) } else { @@ -721,37 +723,256 @@ scrape_configs: require.ElementsMatch(t, []string{"job1", "job3"}, scrapeManager.ScrapePools()) } -// TestManagerCTZeroIngestion tests scrape manager for CT cases. +func setupScrapeManager(t *testing.T, honorTimestamps, enableCTZeroIngestion bool) (*collectResultAppender, *Manager) { + app := &collectResultAppender{} + scrapeManager, err := NewManager( + &Options{ + EnableCreatedTimestampZeroIngestion: enableCTZeroIngestion, + skipOffsetting: true, + }, + promslog.New(&promslog.Config{}), + nil, + &collectResultAppendable{app}, + prometheus.NewRegistry(), + ) + require.NoError(t, err) + + require.NoError(t, scrapeManager.ApplyConfig(&config.Config{ + GlobalConfig: config.GlobalConfig{ + // Disable regular scrapes. + ScrapeInterval: model.Duration(9999 * time.Minute), + ScrapeTimeout: model.Duration(5 * time.Second), + ScrapeProtocols: []config.ScrapeProtocol{config.OpenMetricsText1_0_0, config.PrometheusProto}, + }, + ScrapeConfigs: []*config.ScrapeConfig{{JobName: "test", HonorTimestamps: honorTimestamps}}, + })) + + return app, scrapeManager +} + +func setupTestServer(t *testing.T, typ string, toWrite []byte) *httptest.Server { + once := sync.Once{} + + server := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fail := true + once.Do(func() { + fail = false + w.Header().Set("Content-Type", typ) + w.Write(toWrite) + }) + + if fail { + w.WriteHeader(http.StatusInternalServerError) + } + }), + ) + + t.Cleanup(func() { server.Close() }) + + return server +} + +// TestManagerCTZeroIngestion tests scrape manager for various CT cases. func TestManagerCTZeroIngestion(t *testing.T) { - const mName = "expected_counter" + const ( + // _total suffix is required, otherwise expfmt with OMText will mark metric as "unknown" + expectedMetricName = "expected_metric_total" + expectedCreatedMetricName = "expected_metric_created" + expectedSampleValue = 17.0 + ) + + for _, testFormat := range []config.ScrapeProtocol{config.PrometheusProto, config.OpenMetricsText1_0_0} { + t.Run(fmt.Sprintf("format=%s", testFormat), func(t *testing.T) { + for _, testWithCT := range []bool{false, true} { + t.Run(fmt.Sprintf("withCT=%v", testWithCT), func(t *testing.T) { + for _, testCTZeroIngest := range []bool{false, true} { + t.Run(fmt.Sprintf("ctZeroIngest=%v", testCTZeroIngest), func(t *testing.T) { + sampleTs := time.Now() + ctTs := time.Time{} + if testWithCT { + ctTs = sampleTs.Add(-2 * time.Minute) + } + + // TODO(bwplotka): Add more types than just counter? + encoded := prepareTestEncodedCounter(t, testFormat, expectedMetricName, expectedSampleValue, sampleTs, ctTs) + app, scrapeManager := setupScrapeManager(t, true, testCTZeroIngest) + + // Perform the test. + doOneScrape(t, scrapeManager, app, setupTestServer(t, config.ScrapeProtocolsHeaders[testFormat], encoded)) + + // Verify results. + // Verify what we got vs expectations around CT injection. + samples := findSamplesForMetric(app.resultFloats, expectedMetricName) + if testWithCT && testCTZeroIngest { + require.Len(t, samples, 2) + require.Equal(t, 0.0, samples[0].f) + require.Equal(t, timestamp.FromTime(ctTs), samples[0].t) + require.Equal(t, expectedSampleValue, samples[1].f) + require.Equal(t, timestamp.FromTime(sampleTs), samples[1].t) + } else { + require.Len(t, samples, 1) + require.Equal(t, expectedSampleValue, samples[0].f) + require.Equal(t, timestamp.FromTime(sampleTs), samples[0].t) + } + + // Verify what we got vs expectations around additional _created series for OM text. + // enableCTZeroInjection also kills that _created line. + createdSeriesSamples := findSamplesForMetric(app.resultFloats, expectedCreatedMetricName) + if testFormat == config.OpenMetricsText1_0_0 && testWithCT && !testCTZeroIngest { + // For OM Text, when counter has CT, and feature flag disabled we should see _created lines. + require.Len(t, createdSeriesSamples, 1) + // Conversion taken from common/expfmt.writeOpenMetricsFloat. + // We don't check the ct timestamp as explicit ts was not implemented in expfmt.Encoder, + // but exists in OM https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#:~:text=An%20example%20with%20a%20Metric%20with%20no%20labels%2C%20and%20a%20MetricPoint%20with%20a%20timestamp%20and%20a%20created + // We can implement this, but we want to potentially get rid of OM 1.0 CT lines + require.Equal(t, float64(timestamppb.New(ctTs).AsTime().UnixNano())/1e9, createdSeriesSamples[0].f) + } else { + require.Empty(t, createdSeriesSamples) + } + }) + } + }) + } + }) + } +} + +func prepareTestEncodedCounter(t *testing.T, format config.ScrapeProtocol, mName string, v float64, ts, ct time.Time) (encoded []byte) { + t.Helper() + + counter := &dto.Counter{Value: proto.Float64(v)} + if !ct.IsZero() { + counter.CreatedTimestamp = timestamppb.New(ct) + } + ctrType := dto.MetricType_COUNTER + inputMetric := &dto.MetricFamily{ + Name: proto.String(mName), + Type: &ctrType, + Metric: []*dto.Metric{{ + TimestampMs: proto.Int64(timestamp.FromTime(ts)), + Counter: counter, + }}, + } + switch format { + case config.PrometheusProto: + return protoMarshalDelimited(t, inputMetric) + case config.OpenMetricsText1_0_0: + buf := &bytes.Buffer{} + require.NoError(t, expfmt.NewEncoder(buf, expfmt.NewFormat(expfmt.TypeOpenMetrics), expfmt.WithCreatedLines(), expfmt.WithUnit()).Encode(inputMetric)) + _, _ = buf.WriteString("# EOF") + + t.Log("produced OM text to expose:", buf.String()) + return buf.Bytes() + default: + t.Fatalf("not implemented format: %v", format) + return nil + } +} + +func doOneScrape(t *testing.T, manager *Manager, appender *collectResultAppender, server *httptest.Server) { + t.Helper() + + serverURL, err := url.Parse(server.URL) + require.NoError(t, err) + + // Add fake target directly into tsets + reload + manager.updateTsets(map[string][]*targetgroup.Group{ + "test": {{ + Targets: []model.LabelSet{{ + model.SchemeLabel: model.LabelValue(serverURL.Scheme), + model.AddressLabel: model.LabelValue(serverURL.Host), + }}, + }}, + }) + manager.reload() + + // Wait for one scrape. + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error { + appender.mtx.Lock() + defer appender.mtx.Unlock() + + // Check if scrape happened and grab the relevant samples. + if len(appender.resultFloats) > 0 { + return nil + } + return fmt.Errorf("expected some float samples, got none") + }), "after 1 minute") + manager.Stop() +} + +func findSamplesForMetric(floats []floatSample, metricName string) (ret []floatSample) { + for _, f := range floats { + if f.metric.Get(model.MetricNameLabel) == metricName { + ret = append(ret, f) + } + } + return ret +} + +// generateTestHistogram generates the same thing as tsdbutil.GenerateTestHistogram, +// but in the form of dto.Histogram. +func generateTestHistogram(i int) *dto.Histogram { + helper := tsdbutil.GenerateTestHistogram(i) + h := &dto.Histogram{} + h.SampleCount = proto.Uint64(helper.Count) + h.SampleSum = proto.Float64(helper.Sum) + h.Schema = proto.Int32(helper.Schema) + h.ZeroThreshold = proto.Float64(helper.ZeroThreshold) + h.ZeroCount = proto.Uint64(helper.ZeroCount) + h.PositiveSpan = make([]*dto.BucketSpan, len(helper.PositiveSpans)) + for i, span := range helper.PositiveSpans { + h.PositiveSpan[i] = &dto.BucketSpan{ + Offset: proto.Int32(span.Offset), + Length: proto.Uint32(span.Length), + } + } + h.PositiveDelta = helper.PositiveBuckets + h.NegativeSpan = make([]*dto.BucketSpan, len(helper.NegativeSpans)) + for i, span := range helper.NegativeSpans { + h.NegativeSpan[i] = &dto.BucketSpan{ + Offset: proto.Int32(span.Offset), + Length: proto.Uint32(span.Length), + } + } + h.NegativeDelta = helper.NegativeBuckets + return h +} + +func TestManagerCTZeroIngestionHistogram(t *testing.T) { + const mName = "expected_histogram" for _, tc := range []struct { name string - counterSample *dto.Counter + inputHistSample *dto.Histogram enableCTZeroIngestion bool }{ { - name: "disabled with CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - // Timestamp does not matter as long as it exists in this test. - CreatedTimestamp: timestamppb.Now(), - }, + name: "disabled with CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + h.CreatedTimestamp = timestamppb.Now() + return h + }(), + enableCTZeroIngestion: false, }, { - name: "enabled with CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - // Timestamp does not matter as long as it exists in this test. - CreatedTimestamp: timestamppb.Now(), - }, + name: "enabled with CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + h.CreatedTimestamp = timestamppb.Now() + return h + }(), enableCTZeroIngestion: true, }, { - name: "enabled without CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - }, + name: "enabled without CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + return h + }(), enableCTZeroIngestion: true, }, } { @@ -760,9 +981,10 @@ func TestManagerCTZeroIngestion(t *testing.T) { scrapeManager, err := NewManager( &Options{ EnableCreatedTimestampZeroIngestion: tc.enableCTZeroIngestion, + EnableNativeHistogramsIngestion: true, skipOffsetting: true, }, - log.NewLogfmtLogger(os.Stderr), + promslog.New(&promslog.Config{}), nil, &collectResultAppendable{app}, prometheus.NewRegistry(), @@ -785,16 +1007,16 @@ func TestManagerCTZeroIngestion(t *testing.T) { // Start fake HTTP target to that allow one scrape only. server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fail := true + fail := true // TODO(bwplotka): Kill or use? once.Do(func() { fail = false w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) - ctrType := dto.MetricType_COUNTER + ctrType := dto.MetricType_HISTOGRAM w.Write(protoMarshalDelimited(t, &dto.MetricFamily{ Name: proto.String(mName), Type: &ctrType, - Metric: []*dto.Metric{{Counter: tc.counterSample}}, + Metric: []*dto.Metric{{Histogram: tc.inputHistSample}}, })) }) @@ -820,7 +1042,8 @@ func TestManagerCTZeroIngestion(t *testing.T) { }) scrapeManager.reload() - var got []float64 + var got []histogramSample + // Wait for one scrape. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) defer cancel() @@ -828,32 +1051,35 @@ func TestManagerCTZeroIngestion(t *testing.T) { app.mtx.Lock() defer app.mtx.Unlock() - // Check if scrape happened and grab the relevant samples, they have to be there - or it's a bug + // Check if scrape happened and grab the relevant histograms, they have to be there - or it's a bug // and it's not worth waiting. - for _, f := range app.resultFloats { - if f.metric.Get(model.MetricNameLabel) == mName { - got = append(got, f.f) + for _, h := range app.resultHistograms { + if h.metric.Get(model.MetricNameLabel) == mName { + got = append(got, h) } } - if len(app.resultFloats) > 0 { + if len(app.resultHistograms) > 0 { return nil } - return fmt.Errorf("expected some samples, got none") + return fmt.Errorf("expected some histogram samples, got none") }), "after 1 minute") scrapeManager.Stop() - // Check for zero samples, assuming we only injected always one sample. + // Check for zero samples, assuming we only injected always one histogram sample. // Did it contain CT to inject? If yes, was CT zero enabled? - if tc.counterSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion { + if tc.inputHistSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion { require.Len(t, got, 2) - require.Equal(t, 0.0, got[0]) - require.Equal(t, tc.counterSample.GetValue(), got[1]) + // Zero sample. + require.Equal(t, histogram.Histogram{}, *got[0].h) + // Quick soft check to make sure it's the same sample or at least not zero. + require.Equal(t, tc.inputHistSample.GetSampleSum(), got[1].h.Sum) return } // Expect only one, valid sample. require.Len(t, got, 1) - require.Equal(t, tc.counterSample.GetValue(), got[0]) + // Quick soft check to make sure it's the same sample or at least not zero. + require.Equal(t, tc.inputHistSample.GetSampleSum(), got[0].h.Sum) }) } } @@ -899,7 +1125,7 @@ func runManagers(t *testing.T, ctx context.Context) (*discovery.Manager, *Manage require.NoError(t, err) discoveryManager := discovery.NewManager( ctx, - log.NewNopLogger(), + promslog.NewNopLogger(), reg, sdMetrics, discovery.Updatert(100*time.Millisecond), @@ -1186,7 +1412,7 @@ scrape_configs: } // TestOnlyStaleTargetsAreDropped makes sure that when a job has multiple providers, when one of them should no -// longer discover targets, only the stale targets of that provier are dropped. +// longer discover targets, only the stale targets of that provider are dropped. func TestOnlyStaleTargetsAreDropped(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/scrape/scrape.go b/scrape/scrape.go index a64090e18c..7e270bb3a3 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "net/http" "reflect" @@ -29,11 +30,10 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/klauspost/compress/gzip" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/config" @@ -47,6 +47,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/pool" ) @@ -63,7 +64,7 @@ var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", labels. // scrapePool manages scrapes for sets of targets. type scrapePool struct { appendable storage.Appendable - logger log.Logger + logger *slog.Logger cancel context.CancelFunc httpOpts []config_util.HTTPClientOption @@ -87,11 +88,9 @@ type scrapePool struct { // Constructor for new scrape loops. This is settable for testing convenience. newLoop func(scrapeLoopOptions) loop - noDefaultPort bool - metrics *scrapeMetrics - scrapeFailureLogger log.Logger + scrapeFailureLogger *logging.JSONFileLogger scrapeFailureLoggerMtx sync.RWMutex } @@ -113,8 +112,10 @@ type scrapeLoopOptions struct { trackTimestampsStaleness bool interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string mrc []*relabel.Config cache *scrapeCache @@ -126,9 +127,9 @@ const maxAheadTime = 10 * time.Minute // returning an empty label set is interpreted as "drop". type labelsMutator func(labels.Labels) labels.Labels -func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, buffers *pool.Pool, options *Options, metrics *scrapeMetrics) (*scrapePool, error) { +func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger *slog.Logger, buffers *pool.Pool, options *Options, metrics *scrapeMetrics) (*scrapePool, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, options.HTTPClientOptions...) @@ -149,7 +150,6 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed logger: logger, metrics: metrics, httpOpts: options.HTTPClientOptions, - noDefaultPort: options.NoDefaultPort, } sp.newLoop = func(opts scrapeLoopOptions) loop { // Update the targets retrieval function for metadata to a new scrape cache. @@ -162,7 +162,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed return newScrapeLoop( ctx, opts.scraper, - log.With(logger, "target", opts.target), + logger.With("target", opts.target), buffers, func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, opts.target, opts.honorLabels, opts.mrc) @@ -181,7 +181,8 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.labelLimits, opts.interval, opts.timeout, - opts.scrapeClassicHistograms, + opts.alwaysScrapeClassicHist, + opts.convertClassicHistToNHCB, options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, @@ -191,6 +192,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed metrics, options.skipOffsetting, opts.validationScheme, + opts.fallbackScrapeProtocol, ) } sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) @@ -221,11 +223,11 @@ func (sp *scrapePool) DroppedTargetsCount() int { return sp.droppedTargetsCount } -func (sp *scrapePool) SetScrapeFailureLogger(l log.Logger) { +func (sp *scrapePool) SetScrapeFailureLogger(l *logging.JSONFileLogger) { sp.scrapeFailureLoggerMtx.Lock() defer sp.scrapeFailureLoggerMtx.Unlock() if l != nil { - l = log.With(l, "job_name", sp.config.JobName) + l.With("job_name", sp.config.JobName) } sp.scrapeFailureLogger = l @@ -236,7 +238,7 @@ func (sp *scrapePool) SetScrapeFailureLogger(l log.Logger) { } } -func (sp *scrapePool) getScrapeFailureLogger() log.Logger { +func (sp *scrapePool) getScrapeFailureLogger() *logging.JSONFileLogger { sp.scrapeFailureLoggerMtx.RLock() defer sp.scrapeFailureLoggerMtx.RUnlock() return sp.scrapeFailureLogger @@ -327,6 +329,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() ) validationScheme := model.UTF8Validation @@ -373,6 +376,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { interval: interval, timeout: timeout, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) ) if err != nil { @@ -429,9 +433,9 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { sp.droppedTargets = []*Target{} sp.droppedTargetsCount = 0 for _, tg := range tgs { - targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb) + targets, failures := TargetsFromGroup(tg, sp.config, targets, lb) for _, err := range failures { - level.Error(sp.logger).Log("msg", "Creating target failed", "err", err) + sp.logger.Error("Creating target failed", "err", err) } sp.metrics.targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures))) for _, t := range targets { @@ -482,7 +486,9 @@ func (sp *scrapePool) sync(targets []*Target) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs - scrapeClassicHistograms = sp.config.ScrapeClassicHistograms + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() + alwaysScrapeClassicHist = sp.config.AlwaysScrapeClassicHistograms + convertClassicHistToNHCB = sp.config.ConvertClassicHistogramsToNHCB ) validationScheme := model.UTF8Validation @@ -523,7 +529,10 @@ func (sp *scrapePool) sync(targets []*Target) { mrc: mrc, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, + validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) if err != nil { l.setForcedError(err) @@ -851,7 +860,7 @@ func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w type loop interface { run(errc chan<- error) setForcedError(err error) - setScrapeFailureLogger(log.Logger) + setScrapeFailureLogger(*logging.JSONFileLogger) stop() getCache() *scrapeCache disableEndOfRunStalenessMarkers() @@ -866,8 +875,8 @@ type cacheEntry struct { type scrapeLoop struct { scraper scraper - l log.Logger - scrapeFailureLogger log.Logger + l *slog.Logger + scrapeFailureLogger *logging.JSONFileLogger scrapeFailureLoggerMtx sync.RWMutex cache *scrapeCache lastScrapeSize int @@ -884,8 +893,10 @@ type scrapeLoop struct { labelLimits *labelLimits interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string // Feature flagged options. enableNativeHistogramIngestion bool @@ -1167,7 +1178,7 @@ func (c *scrapeCache) LengthMetadata() int { func newScrapeLoop(ctx context.Context, sc scraper, - l log.Logger, + l *slog.Logger, buffers *pool.Pool, sampleMutator labelsMutator, reportSampleMutator labelsMutator, @@ -1184,7 +1195,8 @@ func newScrapeLoop(ctx context.Context, labelLimits *labelLimits, interval time.Duration, timeout time.Duration, - scrapeClassicHistograms bool, + alwaysScrapeClassicHist bool, + convertClassicHistToNHCB bool, enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, @@ -1194,9 +1206,10 @@ func newScrapeLoop(ctx context.Context, metrics *scrapeMetrics, skipOffsetting bool, validationScheme model.ValidationScheme, + fallbackScrapeProtocol string, ) *scrapeLoop { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if buffers == nil { buffers = pool.New(1e3, 1e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }) @@ -1238,7 +1251,8 @@ func newScrapeLoop(ctx context.Context, labelLimits: labelLimits, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, enableNativeHistogramIngestion: enableNativeHistogramIngestion, enableCTZeroIngestion: enableCTZeroIngestion, reportExtraMetrics: reportExtraMetrics, @@ -1246,17 +1260,18 @@ func newScrapeLoop(ctx context.Context, metrics: metrics, skipOffsetting: skipOffsetting, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, } sl.ctx, sl.cancel = context.WithCancel(ctx) return sl } -func (sl *scrapeLoop) setScrapeFailureLogger(l log.Logger) { +func (sl *scrapeLoop) setScrapeFailureLogger(l *logging.JSONFileLogger) { sl.scrapeFailureLoggerMtx.Lock() defer sl.scrapeFailureLoggerMtx.Unlock() if ts, ok := sl.scraper.(fmt.Stringer); ok && l != nil { - l = log.With(l, "target", ts.String()) + l.With("target", ts.String()) } sl.scrapeFailureLogger = l } @@ -1354,13 +1369,13 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er } err = app.Commit() if err != nil { - level.Error(sl.l).Log("msg", "Scrape commit failed", "err", err) + sl.l.Error("Scrape commit failed", "err", err) } }() defer func() { if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, bytesRead, scrapeErr); err != nil { - level.Warn(sl.l).Log("msg", "Appending scrape report failed", "err", err) + sl.l.Warn("Appending scrape report failed", "err", err) } }() @@ -1370,7 +1385,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Append failed", "err", err) + sl.l.Warn("Append failed", "err", err) } if errc != nil { errc <- forcedErr @@ -1403,10 +1418,10 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er } bytesRead = len(b) } else { - level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr) + sl.l.Debug("Scrape failed", "err", scrapeErr) sl.scrapeFailureLoggerMtx.RLock() if sl.scrapeFailureLogger != nil { - sl.scrapeFailureLogger.Log("err", scrapeErr) + sl.scrapeFailureLogger.Error("err", scrapeErr) } sl.scrapeFailureLoggerMtx.RUnlock() if errc != nil { @@ -1423,13 +1438,13 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er if appErr != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Debug(sl.l).Log("msg", "Append failed", "err", appErr) + sl.l.Debug("Append failed", "err", appErr) // The append failed, probably due to a parse error or sample limit. // Call sl.append again with an empty scrape to trigger stale markers. if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Append failed", "err", err) + sl.l.Warn("Append failed", "err", err) } } @@ -1502,16 +1517,16 @@ func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, int } err = app.Commit() if err != nil { - level.Warn(sl.l).Log("msg", "Stale commit failed", "err", err) + sl.l.Warn("Stale commit failed", "err", err) } }() if _, _, _, err = sl.append(app, []byte{}, "", staleTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Stale append failed", "err", err) + sl.l.Warn("Stale append failed", "err", err) } if err = sl.reportStale(app, staleTime); err != nil { - level.Warn(sl.l).Log("msg", "Stale report failed", "err", err) + sl.l.Warn("Stale report failed", "err", err) } } @@ -1538,11 +1553,24 @@ type appendErrors struct { } func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { - p, err := textparse.New(b, contentType, sl.scrapeClassicHistograms, sl.symbolTable) - if err != nil { - level.Debug(sl.l).Log( - "msg", "Invalid content type on scrape, using prometheus parser as fallback.", + p, err := textparse.New(b, contentType, sl.fallbackScrapeProtocol, sl.alwaysScrapeClassicHist, sl.enableCTZeroIngestion, sl.symbolTable) + if p == nil { + sl.l.Error( + "Failed to determine correct type of scrape target.", "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, + "err", err, + ) + return + } + if sl.convertClassicHistToNHCB { + p = textparse.NewNHCBParser(p, sl.symbolTable, sl.alwaysScrapeClassicHist) + } + if err != nil { + sl.l.Debug( + "Invalid content type on scrape, using fallback setting.", + "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, "err", err, ) } @@ -1558,7 +1586,7 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, metadataChanged bool ) - exemplars := make([]exemplar.Exemplar, 1) + exemplars := make([]exemplar.Exemplar, 0, 1) // updateMetadata updates the current iteration's metadata object and the // metadataChanged value if we have metadata in the scrape cache AND the @@ -1700,11 +1728,19 @@ loop: } else { if sl.enableCTZeroIngestion { if ctMs := p.CreatedTimestamp(); ctMs != nil { - ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + if isHistogram && sl.enableNativeHistogramIngestion { + if h != nil { + ref, err = app.AppendHistogramCTZeroSample(ref, lset, t, *ctMs, h, nil) + } else { + ref, err = app.AppendHistogramCTZeroSample(ref, lset, t, *ctMs, nil, fh) + } + } else { + ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + } if err != nil && !errors.Is(err, storage.ErrOutOfOrderCT) { // OOO is a common case, ignoring completely for now. // CT is an experimental feature. For now, we don't need to fail the // scrape on errors updating the created timestamp, log debug. - level.Debug(sl.l).Log("msg", "Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) + sl.l.Debug("Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) } } } @@ -1729,7 +1765,7 @@ loop: sampleAdded, err = sl.checkAddError(met, err, &sampleLimitErr, &bucketLimitErr, &appErrs) if err != nil { if !errors.Is(err, storage.ErrNotFound) { - level.Debug(sl.l).Log("msg", "Unexpected error", "series", string(met), "err", err) + sl.l.Debug("Unexpected error", "series", string(met), "err", err) } break loop } @@ -1781,21 +1817,21 @@ loop: outOfOrderExemplars++ default: // Since exemplar storage is still experimental, we don't fail the scrape on ingestion errors. - level.Debug(sl.l).Log("msg", "Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) + sl.l.Debug("Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) } } if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) { // Only report out of order exemplars if all are out of order, otherwise this was a partial update // to some existing set of exemplars. appErrs.numExemplarOutOfOrder += outOfOrderExemplars - level.Debug(sl.l).Log("msg", "Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) + sl.l.Debug("Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars)) } if sl.appendMetadataToWAL && metadataChanged { if _, merr := app.UpdateMetadata(ref, lset, meta); merr != nil { // No need to fail the scrape on errors appending metadata. - level.Debug(sl.l).Log("msg", "Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", meta), "err", merr) + sl.l.Debug("Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", meta), "err", merr) } } } @@ -1814,21 +1850,23 @@ loop: sl.metrics.targetScrapeNativeHistogramBucketLimit.Inc() } if appErrs.numOutOfOrder > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) + sl.l.Warn("Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) } if appErrs.numDuplicates > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates) + sl.l.Warn("Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates) } if appErrs.numOutOfBounds > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds) + sl.l.Warn("Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds) } if appErrs.numExemplarOutOfOrder > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) + sl.l.Warn("Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) } if err == nil { sl.cache.forEachStale(func(lset labels.Labels) bool { // Series no longer exposed, mark it stale. + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) _, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN)) + app.SetOptions(nil) switch { case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): // Do not count these in logging, as this is expected if a target @@ -1851,17 +1889,17 @@ func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucke return false, storage.ErrNotFound case errors.Is(err, storage.ErrOutOfOrderSample): appErrs.numOutOfOrder++ - level.Debug(sl.l).Log("msg", "Out of order sample", "series", string(met)) + sl.l.Debug("Out of order sample", "series", string(met)) sl.metrics.targetScrapeSampleOutOfOrder.Inc() return false, nil case errors.Is(err, storage.ErrDuplicateSampleForTimestamp): appErrs.numDuplicates++ - level.Debug(sl.l).Log("msg", "Duplicate sample for timestamp", "series", string(met)) + sl.l.Debug("Duplicate sample for timestamp", "series", string(met)) sl.metrics.targetScrapeSampleDuplicate.Inc() return false, nil case errors.Is(err, storage.ErrOutOfBounds): appErrs.numOutOfBounds++ - level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met)) + sl.l.Debug("Out of bounds metric", "series", string(met)) sl.metrics.targetScrapeSampleOutOfBounds.Inc() return false, nil case errors.Is(err, errSampleLimit): @@ -1934,7 +1972,7 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err error) { ts := timestamp.FromTime(start) - + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) stale := math.Float64frombits(value.StaleNaN) b := labels.NewBuilder(labels.EmptyLabels()) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 7725bde5e1..f75e1db89a 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -29,16 +29,18 @@ import ( "strings" "sync" "testing" + "text/template" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" + "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -53,6 +55,7 @@ import ( "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/pool" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" @@ -83,6 +86,97 @@ func TestNewScrapePool(t *testing.T) { require.NotNil(t, sp.newLoop, "newLoop function not initialized.") } +func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) { + // Test with default OutOfOrderTimeWindow (0) + t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + runScrapeLoopTest(t, s, false) + }) + + // Test with specific OutOfOrderTimeWindow (600000) + t.Run("Out-Of-Order Sample Enabled", func(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + runScrapeLoopTest(t, s, true) + }) +} + +func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) { + // Create an appender for adding samples to the storage. + app := s.Appender(context.Background()) + capp := &collectResultAppender{next: app} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + + // Current time for generating timestamps. + now := time.Now() + + // Calculate timestamps for the samples based on the current time. + now = now.Truncate(time.Minute) // round down the now timestamp to the nearest minute + timestampInorder1 := now + timestampOutOfOrder := now.Add(-5 * time.Minute) + timestampInorder2 := now.Add(5 * time.Minute) + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "", timestampInorder1) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 2`), "", timestampOutOfOrder) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 3`), "", timestampInorder2) + require.NoError(t, err) + + require.NoError(t, slApp.Commit()) + + // Query the samples back from the storage. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + // Use a matcher to filter the metric name. + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + + // Define the expected results + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder1), + f: 1, + }, + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder2), + f: 3, + }, + } + + if expectOutOfOrder { + require.NotEqual(t, want, results, "Expected results to include out-of-order sample:\n%s", results) + } else { + require.Equal(t, want, results, "Appended samples not as expected:\n%s", results) + } +} + func TestDroppedTargetsList(t *testing.T) { var ( app = &nopAppendable{} @@ -158,7 +252,7 @@ type testLoop struct { timeout time.Duration } -func (l *testLoop) setScrapeFailureLogger(log.Logger) { +func (l *testLoop) setScrapeFailureLogger(*logging.JSONFileLogger) { } func (l *testLoop) run(errc chan<- error) { @@ -395,7 +489,7 @@ func TestScrapePoolTargetLimit(t *testing.T) { activeTargets: map[uint64]*Target{}, loops: map[uint64]loop{}, newLoop: newLoop, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), client: http.DefaultClient, metrics: newTestScrapeMetrics(t), symbolTable: labels.NewSymbolTable(), @@ -440,7 +534,7 @@ func TestScrapePoolTargetLimit(t *testing.T) { lerr := l.(*testLoop).getForcedError() if shouldErr { require.Error(t, lerr, "error was expected for %d targets with a limit of %d", targets, limit) - require.Equal(t, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit), lerr.Error()) + require.EqualError(t, lerr, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit)) } else { require.NoError(t, lerr) } @@ -683,11 +777,13 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), false, model.LegacyValidation, + "text/plain", ) } @@ -826,11 +922,13 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "text/plain", ) // The loop must terminate during the initial offset if the context @@ -971,11 +1069,13 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "text/plain", ) defer cancel() @@ -1148,6 +1248,87 @@ func BenchmarkScrapeLoopAppendOM(b *testing.B) { } } +func TestSetOptionsHandlingStaleness(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + signal := make(chan struct{}, 1) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Function to run the scrape loop + runScrapeLoop := func(ctx context.Context, t *testing.T, cue int, action func(*scrapeLoop)) { + var ( + scraper = &testScraper{} + app = func(ctx context.Context) storage.Appender { + return s.Appender(ctx) + } + ) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + numScrapes := 0 + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes == cue { + action(sl) + } + w.Write([]byte(fmt.Sprintf("metric_a{a=\"1\",b=\"1\"} %d\n", 42+numScrapes))) + return nil + } + sl.run(nil) + } + go func() { + runScrapeLoop(ctx, t, 2, func(sl *scrapeLoop) { + go sl.stop() + // Wait a bit then start a new target. + time.Sleep(100 * time.Millisecond) + go func() { + runScrapeLoop(ctx, t, 4, func(_ *scrapeLoop) { + cancel() + }) + signal <- struct{}{} + }() + }) + }() + + select { + case <-signal: + case <-time.After(10 * time.Second): + t.Fatalf("Scrape wasn't stopped.") + } + + ctx1, cancel := context.WithCancel(context.Background()) + defer cancel() + + q, err := s.Querier(0, time.Now().UnixNano()) + + require.NoError(t, err) + defer q.Close() + + series := q.Select(ctx1, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + var c int + for _, s := range results { + if value.IsStaleNaN(s.f) { + c++ + } + } + require.Equal(t, 0, c, "invalid count of staleness markers after stopping the engine") +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { appender := &collectResultAppender{} var ( @@ -1524,7 +1705,8 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { fakeRef := storage.SeriesRef(1) expValue := float64(1) metric := []byte(`metric{n="1"} 1`) - p, warning := textparse.New(metric, "", false, labels.NewSymbolTable()) + p, warning := textparse.New(metric, "text/plain", "", false, false, labels.NewSymbolTable()) + require.NotNil(t, p) require.NoError(t, warning) var lset labels.Labels @@ -1844,7 +2026,7 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { func TestScrapeLoopAppendExemplar(t *testing.T) { tests := []struct { title string - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool enableNativeHistogramsIngestion bool scrapeText string contentType string @@ -1998,7 +2180,8 @@ metric: < `, contentType: "application/vnd.google.protobuf", histograms: []histogramSample{{ - t: 1234568, + t: 1234568, + metric: labels.FromStrings("__name__", "test_histogram"), h: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2112,7 +2295,7 @@ metric: < > `, - scrapeClassicHistograms: true, + alwaysScrapeClassicHist: true, contentType: "application/vnd.google.protobuf", floats: []floatSample{ {metric: labels.FromStrings("__name__", "test_histogram_count"), t: 1234568, f: 175}, @@ -2124,7 +2307,8 @@ metric: < {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), t: 1234568, f: 175}, }, histograms: []histogramSample{{ - t: 1234568, + t: 1234568, + metric: labels.FromStrings("__name__", "test_histogram"), h: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2173,7 +2357,7 @@ metric: < sl.reportSampleMutator = func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, discoveryLabels) } - sl.scrapeClassicHistograms = test.scrapeClassicHistograms + sl.alwaysScrapeClassicHist = test.alwaysScrapeClassicHist now := time.Now() @@ -2380,8 +2564,11 @@ func TestTargetScraperScrapeOK(t *testing.T) { expectedTimeout = "1.5" ) - var protobufParsing bool - var allowUTF8 bool + var ( + protobufParsing bool + allowUTF8 bool + qValuePattern = regexp.MustCompile(`q=([0-9]+(\.\d+)?)`) + ) server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -2394,6 +2581,17 @@ func TestTargetScraperScrapeOK(t *testing.T) { "Expected Accept header to prefer application/vnd.google.protobuf.") } + contentTypes := strings.Split(accept, ",") + for _, ct := range contentTypes { + match := qValuePattern.FindStringSubmatch(ct) + require.Len(t, match, 3) + qValue, err := strconv.ParseFloat(match[1], 64) + require.NoError(t, err, "Error parsing q value") + require.GreaterOrEqual(t, qValue, float64(0)) + require.LessOrEqual(t, qValue, float64(1)) + require.LessOrEqual(t, len(strings.Split(match[1], ".")[1]), 3, "q value should have at most 3 decimal places") + } + timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") @@ -2532,7 +2730,7 @@ func TestTargetScrapeScrapeNotFound(t *testing.T) { resp, err := ts.scrape(context.Background()) require.NoError(t, err) _, err = ts.readResponse(context.Background(), resp, io.Discard) - require.Contains(t, err.Error(), "404", "Expected \"404 NotFound\" error but got: %s", err) + require.ErrorContains(t, err, "404", "Expected \"404 NotFound\" error but got: %s", err) } func TestTargetScraperBodySizeLimit(t *testing.T) { @@ -3044,7 +3242,7 @@ func TestReuseCacheRace(t *testing.T) { func TestCheckAddError(t *testing.T) { var appErrs appendErrors - sl := scrapeLoop{l: log.NewNopLogger(), metrics: newTestScrapeMetrics(t)} + sl := scrapeLoop{l: promslog.NewNopLogger(), metrics: newTestScrapeMetrics(t)} sl.checkAddError(nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) require.Equal(t, 1, appErrs.numOutOfOrder) } @@ -3115,18 +3313,7 @@ func TestScrapeReportLimit(t *testing.T) { ScrapeTimeout: model.Duration(100 * time.Millisecond), } - var ( - scrapes int - scrapedTwice = make(chan bool) - ) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, "metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") - scrapes++ - if scrapes == 2 { - close(scrapedTwice) - } - })) + ts, scrapedTwice := newScrapableServer("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") defer ts.Close() sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3169,6 +3356,52 @@ func TestScrapeReportLimit(t *testing.T) { require.True(t, found) } +func TestScrapeUTF8(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + model.NameValidationScheme = model.UTF8Validation + t.Cleanup(func() { model.NameValidationScheme = model.LegacyValidation }) + + cfg := &config.ScrapeConfig{ + JobName: "test", + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + MetricNameValidationScheme: config.UTF8ValidationConfig, + } + ts, scrapedTwice := newScrapableServer("{\"with.dots\"} 42\n") + defer ts.Close() + + sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped twice") + case <-scrapedTwice: + // If the target has been scraped twice, report samples from the first + // scrape have been inserted in the database. + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "with.dots")) + + require.True(t, series.Next(), "series not found in tsdb") +} + func TestScrapeLoopLabelLimit(t *testing.T) { tests := []struct { title string @@ -3365,16 +3598,7 @@ test_summary_count 199 // The expected "quantile" values do not have the trailing ".0". expectedQuantileValues := []string{"0.5", "0.9", "0.95", "0.99", "1"} - scrapeCount := 0 - scraped := make(chan bool) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, metricsText) - scrapeCount++ - if scrapeCount > 2 { - close(scraped) - } - })) + ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3393,7 +3617,7 @@ test_summary_count 199 select { case <-time.After(5 * time.Second): t.Fatalf("target was not scraped") - case <-scraped: + case <-scrapedTwice: } ctx, cancel := context.WithCancel(context.Background()) @@ -3425,6 +3649,524 @@ test_summary_count 199 checkValues("quantile", expectedQuantileValues, series) } +// Testing whether we can automatically convert scraped classic histograms into native histograms with custom buckets. +func TestConvertClassicHistogramsToNHCB(t *testing.T) { + genTestCounterText := func(name string, value int, withMetadata bool) string { + if withMetadata { + return fmt.Sprintf(` +# HELP %s some help text +# TYPE %s counter +%s{address="0.0.0.0",port="5001"} %d +`, name, name, name, value) + } + return fmt.Sprintf(` +%s %d +`, name, value) + } + genTestHistText := func(name string, withMetadata bool) string { + data := map[string]interface{}{ + "name": name, + } + b := &bytes.Buffer{} + if withMetadata { + template.Must(template.New("").Parse(` +# HELP {{.name}} This is a histogram with default buckets +# TYPE {{.name}} histogram +`)).Execute(b, data) + } + template.Must(template.New("").Parse(` +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.005"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.01"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.025"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.05"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.25"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="2.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="10"} 1 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="+Inf"} 1 +{{.name}}_sum{address="0.0.0.0",port="5001"} 10 +{{.name}}_count{address="0.0.0.0",port="5001"} 1 +`)).Execute(b, data) + return b.String() + } + genTestCounterProto := func(name string, value int) string { + return fmt.Sprintf(` +name: "%s" +help: "some help text" +type: COUNTER +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + counter: < + value: %d + > +> +`, name, value) + } + genTestHistProto := func(name string, hasClassic, hasExponential bool) string { + var classic string + if hasClassic { + classic = ` +bucket: < + cumulative_count: 0 + upper_bound: 0.005 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.01 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.025 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.05 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.1 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.25 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 1 +> +bucket: < + cumulative_count: 0 + upper_bound: 2.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 5 +> +bucket: < + cumulative_count: 1 + upper_bound: 10 +>` + } + var expo string + if hasExponential { + expo = ` +schema: 3 +zero_threshold: 2.938735877055719e-39 +zero_count: 0 +positive_span: < + offset: 2 + length: 1 +> +positive_delta: 1` + } + return fmt.Sprintf(` +name: "%s" +help: "This is a histogram with default buckets" +type: HISTOGRAM +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + histogram: < + sample_count: 1 + sample_sum: 10 + %s + %s + > + timestamp_ms: 1234568 +> +`, name, classic, expo) + } + + metricsTexts := map[string]struct { + text []string + contentType string + hasClassic bool + hasExponential bool + }{ + "text": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + genTestHistText("test_histogram_3", true), + }, + hasClassic: true, + }, + "text, in different order": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestHistText("test_histogram_3", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + }, + hasClassic: true, + }, + "protobuf": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, in different order": { + text: []string{ + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, with additional native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, true), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + hasExponential: true, + }, + "protobuf, with only native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", false, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", false, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", false, true), + }, + contentType: "application/vnd.google.protobuf", + hasExponential: true, + }, + } + + checkBucketValues := func(expectedCount int, series storage.SeriesSet) { + labelName := "le" + var expectedValues []string + if expectedCount > 0 { + expectedValues = []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1.0", "2.5", "5.0", "10.0", "+Inf"} + } + foundLeValues := map[string]bool{} + + for series.Next() { + s := series.At() + v := s.Labels().Get(labelName) + require.NotContains(t, foundLeValues, v, "duplicate label value found") + foundLeValues[v] = true + } + + require.Equal(t, len(expectedValues), len(foundLeValues), "unexpected number of label values, expected %v but found %v", expectedValues, foundLeValues) + for _, v := range expectedValues { + require.Contains(t, foundLeValues, v, "label value not found") + } + } + + // Checks that the expected series is present and runs a basic sanity check of the float values. + checkFloatSeries := func(series storage.SeriesSet, expectedCount int, expectedFloat float64) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + _, f := i.At() + require.Equal(t, expectedFloat, f) + case chunkenc.ValHistogram: + panic("unexpected value type: histogram") + case chunkenc.ValFloatHistogram: + panic("unexpected value type: float histogram") + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of float series not as expected") + } + + // Checks that the expected series is present and runs a basic sanity check of the histogram values. + checkHistSeries := func(series storage.SeriesSet, expectedCount int, expectedSchema int32) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + panic("unexpected value type: float") + case chunkenc.ValHistogram: + _, h := i.AtHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + case chunkenc.ValFloatHistogram: + _, h := i.AtFloatHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of histogram series not as expected") + } + + for metricsTextName, metricsText := range metricsTexts { + for name, tc := range map[string]struct { + alwaysScrapeClassicHistograms bool + convertClassicHistToNHCB bool + }{ + "convert with scrape": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: true, + }, + "convert without scrape": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: true, + }, + "scrape without convert": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: false, + }, + "neither scrape nor convert": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: false, + }, + } { + var expectedClassicHistCount, expectedNativeHistCount int + var expectCustomBuckets bool + if metricsText.hasExponential { + expectedNativeHistCount = 1 + expectCustomBuckets = false + expectedClassicHistCount = 0 + if metricsText.hasClassic && tc.alwaysScrapeClassicHistograms { + expectedClassicHistCount = 1 + } + } else if metricsText.hasClassic { + switch { + case tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 0 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 0 + } + } + + t.Run(fmt.Sprintf("%s with %s", name, metricsTextName), func(t *testing.T) { + simpleStorage := teststorage.New(t) + defer simpleStorage.Close() + + config := &config.ScrapeConfig{ + JobName: "test", + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(50 * time.Millisecond), + ScrapeTimeout: model.Duration(25 * time.Millisecond), + AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, + ConvertClassicHistogramsToNHCB: tc.convertClassicHistToNHCB, + } + + scrapeCount := 0 + scraped := make(chan bool) + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if metricsText.contentType != "" { + w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) + for _, text := range metricsText.text { + buf := &bytes.Buffer{} + // In case of protobuf, we have to create the binary representation. + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(text, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintBuf := binary.AppendUvarint(nil, uint64(len(protoBuf))) + buf.Write(varintBuf) + buf.Write(protoBuf) + w.Write(buf.Bytes()) + } + } else { + for _, text := range metricsText.text { + fmt.Fprint(w, text) + } + } + scrapeCount++ + if scrapeCount > 2 { + close(scraped) + } + })) + defer ts.Close() + + sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + require.Len(t, sp.ActiveTargets(), 1) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped") + case <-scraped: + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := simpleStorage.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + var series storage.SeriesSet + + for i := 1; i <= 3; i++ { + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_count", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_sum", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_bucket", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_count", i))) + checkFloatSeries(series, expectedClassicHistCount, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_sum", i))) + checkFloatSeries(series, expectedClassicHistCount, 10.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_bucket", i))) + checkBucketValues(expectedClassicHistCount, series) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d", i))) + + var expectedSchema int32 + if expectCustomBuckets { + expectedSchema = histogram.CustomBucketsSchema + } else { + expectedSchema = 3 + } + checkHistSeries(series, expectedNativeHistCount, expectedSchema) + } + }) + } + } +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) { appender := &collectResultAppender{} var ( @@ -3462,7 +4204,6 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t * case <-time.After(5 * time.Second): t.Fatalf("Scrape wasn't stopped.") } - // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for // each scrape successful or not. require.Len(t, appender.resultFloats, 27, "Appended samples not as expected:\n%s", appender) @@ -3730,7 +4471,9 @@ func TestNativeHistogramMaxSchemaSet(t *testing.T) { }, } for name, tc := range testcases { + tc := tc t.Run(name, func(t *testing.T) { + t.Parallel() testNativeHistogramMaxSchemaSet(t, tc.minBucketFactor, tc.expectedSchema) }) } @@ -3773,8 +4516,8 @@ func testNativeHistogramMaxSchemaSet(t *testing.T, minBucketFactor string, expec configStr := fmt.Sprintf(` global: metric_name_validation_scheme: legacy - scrape_interval: 1s - scrape_timeout: 1s + scrape_interval: 50ms + scrape_timeout: 25ms scrape_configs: - job_name: test %s @@ -3787,9 +4530,9 @@ scrape_configs: s.DB.EnableNativeHistograms() reg := prometheus.NewRegistry() - mng, err := NewManager(&Options{EnableNativeHistogramsIngestion: true}, nil, nil, s, reg) + mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, nil, nil, s, reg) require.NoError(t, err) - cfg, err := config.Load(configStr, false, log.NewNopLogger()) + cfg, err := config.Load(configStr, promslog.NewNopLogger()) require.NoError(t, err) mng.ApplyConfig(cfg) tsets := make(chan map[string][]*targetgroup.Group) @@ -3818,7 +4561,7 @@ scrape_configs: countSeries++ } return countSeries > 0 - }, 15*time.Second, 100*time.Millisecond) + }, 5*time.Second, 100*time.Millisecond) // Check that native histogram schema is as expected. q, err := s.Querier(0, math.MaxInt64) @@ -4001,3 +4744,16 @@ func TestTargetScrapeConfigWithLabels(t *testing.T) { }) } } + +func newScrapableServer(scrapeText string) (s *httptest.Server, scrapedTwice chan bool) { + var scrapes int + scrapedTwice = make(chan bool) + + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, scrapeText) + scrapes++ + if scrapes == 2 { + close(scrapedTwice) + } + })), scrapedTwice +} diff --git a/scrape/target.go b/scrape/target.go index 3754398338..06d4737ff9 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -17,7 +17,6 @@ import ( "errors" "fmt" "hash/fnv" - "net" "net/url" "strings" "sync" @@ -424,7 +423,7 @@ func (app *maxSchemaAppender) AppendHistogram(ref storage.SeriesRef, lset labels // PopulateLabels builds a label set from the given label set and scrape configuration. // It returns a label set before relabeling was applied as the second return value. // Returns the original discovered label set found before relabelling was applied if the target is dropped during relabeling. -func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort bool) (res, orig labels.Labels, err error) { +func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig) (res, orig labels.Labels, err error) { // Copy labels into the labelset for the target if they are not set already. scrapeLabels := []labels.Label{ {Name: model.JobLabel, Value: cfg.JobName}, @@ -457,51 +456,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("no address") } - // addPort checks whether we should add a default port to the address. - // If the address is not valid, we don't append a port either. - addPort := func(s string) (string, string, bool) { - // If we can split, a port exists and we don't have to add one. - if host, port, err := net.SplitHostPort(s); err == nil { - return host, port, false - } - // If adding a port makes it valid, the previous error - // was not due to an invalid address and we can append a port. - _, _, err := net.SplitHostPort(s + ":1234") - return "", "", err == nil - } - addr := lb.Get(model.AddressLabel) - scheme := lb.Get(model.SchemeLabel) - host, port, add := addPort(addr) - // If it's an address with no trailing port, infer it based on the used scheme - // unless the no-default-scrape-port feature flag is present. - if !noDefaultPort && add { - // Addresses reaching this point are already wrapped in [] if necessary. - switch scheme { - case "http", "": - addr += ":80" - case "https": - addr += ":443" - default: - return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("invalid scheme: %q", cfg.Scheme) - } - lb.Set(model.AddressLabel, addr) - } - - if noDefaultPort { - // If it's an address with a trailing default port and the - // no-default-scrape-port flag is present, remove the port. - switch port { - case "80": - if scheme == "http" { - lb.Set(model.AddressLabel, host) - } - case "443": - if scheme == "https" { - lb.Set(model.AddressLabel, host) - } - } - } if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil { return labels.EmptyLabels(), labels.EmptyLabels(), err @@ -557,7 +512,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort } // TargetsFromGroup builds targets based on the given TargetGroup and config. -func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefaultPort bool, targets []*Target, lb *labels.Builder) ([]*Target, []error) { +func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, targets []*Target, lb *labels.Builder) ([]*Target, []error) { targets = targets[:0] failures := []error{} @@ -573,7 +528,7 @@ func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefault } } - lset, origLabels, err := PopulateLabels(lb, cfg, noDefaultPort) + lset, origLabels, err := PopulateLabels(lb, cfg) if err != nil { failures = append(failures, fmt.Errorf("instance %d in group %s: %w", i, tg, err)) } diff --git a/scrape/target_test.go b/scrape/target_test.go index 84fe078b2b..bd27952874 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -348,7 +348,7 @@ func TestTargetsFromGroup(t *testing.T) { ScrapeInterval: model.Duration(1 * time.Minute), } lb := labels.NewBuilder(labels.EmptyLabels()) - targets, failures := TargetsFromGroup(&targetgroup.Group{Targets: []model.LabelSet{{}, {model.AddressLabel: "localhost:9090"}}}, &cfg, false, nil, lb) + targets, failures := TargetsFromGroup(&targetgroup.Group{Targets: []model.LabelSet{{}, {model.AddressLabel: "localhost:9090"}}}, &cfg, nil, lb) require.Len(t, targets, 1) require.Len(t, failures, 1) require.EqualError(t, failures[0], expectedError) @@ -435,7 +435,7 @@ scrape_configs: lb := labels.NewBuilder(labels.EmptyLabels()) group := &targetgroup.Group{Targets: targets} for i := 0; i < b.N; i++ { - tgets, _ = TargetsFromGroup(group, config.ScrapeConfigs[0], false, tgets, lb) + tgets, _ = TargetsFromGroup(group, config.ScrapeConfigs[0], tgets, lb) if len(targets) != nTargets { b.Fatalf("Expected %d targets, got %d", nTargets, len(targets)) } diff --git a/scripts/compress_assets.sh b/scripts/compress_assets.sh index 6608677bbf..19e1e22486 100755 --- a/scripts/compress_assets.sh +++ b/scripts/compress_assets.sh @@ -4,6 +4,12 @@ set -euo pipefail +export STATIC_DIR=static +PREBUILT_ASSETS_STATIC_DIR=${PREBUILT_ASSETS_STATIC_DIR:-} +if [ -n "$PREBUILT_ASSETS_STATIC_DIR" ]; then + STATIC_DIR=$(realpath $PREBUILT_ASSETS_STATIC_DIR) +fi + cd web/ui cp embed.go.tmpl embed.go @@ -11,6 +17,19 @@ GZIP_OPTS="-fk" # gzip option '-k' may not always exist in the latest gzip available on different distros. if ! gzip -k -h &>/dev/null; then GZIP_OPTS="-f"; fi +mkdir -p static find static -type f -name '*.gz' -delete -find static -type f -exec gzip $GZIP_OPTS '{}' \; -print0 | xargs -0 -I % echo %.gz | sort | xargs echo //go:embed >> embed.go + +# Compress files from the prebuilt static directory and replicate the structure in the current static directory +find "${STATIC_DIR}" -type f ! -name '*.gz' -exec bash -c ' + for file; do + dest="${file#${STATIC_DIR}}" + mkdir -p "static/$(dirname "$dest")" + gzip '"$GZIP_OPTS"' "$file" -c > "static/${dest}.gz" + done +' bash {} + + +# Append the paths of gzipped files to embed.go +find static -type f -name '*.gz' -print0 | sort -z | xargs -0 echo //go:embed >> embed.go + echo var EmbedFS embed.FS >> embed.go diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index a15cfc97f0..1c099932ba 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Install Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: diff --git a/storage/buffer.go b/storage/buffer.go index 651e5c83e8..e847c10e61 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -187,6 +187,10 @@ func (s fSample) Type() chunkenc.ValueType { return chunkenc.ValFloat } +func (s fSample) Copy() chunks.Sample { + return s +} + type hSample struct { t int64 h *histogram.Histogram @@ -212,6 +216,10 @@ func (s hSample) Type() chunkenc.ValueType { return chunkenc.ValHistogram } +func (s hSample) Copy() chunks.Sample { + return hSample{t: s.t, h: s.h.Copy()} +} + type fhSample struct { t int64 fh *histogram.FloatHistogram @@ -237,13 +245,17 @@ func (s fhSample) Type() chunkenc.ValueType { return chunkenc.ValFloatHistogram } +func (s fhSample) Copy() chunks.Sample { + return fhSample{t: s.t, fh: s.fh.Copy()} +} + type sampleRing struct { delta int64 // Lookback buffers. We use iBuf for mixed samples, but one of the three - // concrete ones for homogenous samples. (Only one of the four bufs is + // concrete ones for homogeneous samples. (Only one of the four bufs is // allowed to be populated!) This avoids the overhead of the interface - // wrapper for the happy (and by far most common) case of homogenous + // wrapper for the happy (and by far most common) case of homogeneous // samples. iBuf []chunks.Sample fBuf []fSample @@ -268,7 +280,7 @@ const ( fhBuf ) -// newSampleRing creates a new sampleRing. If you do not know the prefereed +// newSampleRing creates a new sampleRing. If you do not know the preferred // value type yet, use a size of 0 (in which case the provided typ doesn't // matter). On the first add, a buffer of size 16 will be allocated with the // preferred type being the type of the first added sample. @@ -535,55 +547,8 @@ func (r *sampleRing) addFH(s fhSample) { } } -// genericAdd is a generic implementation of adding a chunks.Sample -// implementation to a buffer of a sample ring. However, the Go compiler -// currently (go1.20) decides to not expand the code during compile time, but -// creates dynamic code to handle the different types. That has a significant -// overhead during runtime, noticeable in PromQL benchmarks. For example, the -// "RangeQuery/expr=rate(a_hundred[1d]),steps=.*" benchmarks show about 7% -// longer runtime, 9% higher allocation size, and 10% more allocations. -// Therefore, genericAdd has been manually implemented for all the types -// (addSample, addF, addH, addFH) below. -// -// func genericAdd[T chunks.Sample](s T, buf []T, r *sampleRing) []T { -// l := len(buf) -// // Grow the ring buffer if it fits no more elements. -// if l == 0 { -// buf = make([]T, 16) -// l = 16 -// } -// if l == r.l { -// newBuf := make([]T, 2*l) -// copy(newBuf[l+r.f:], buf[r.f:]) -// copy(newBuf, buf[:r.f]) -// -// buf = newBuf -// r.i = r.f -// r.f += l -// l = 2 * l -// } else { -// r.i++ -// if r.i >= l { -// r.i -= l -// } -// } -// -// buf[r.i] = s -// r.l++ -// -// // Free head of the buffer of samples that just fell out of the range. -// tmin := s.T() - r.delta -// for buf[r.f].T() < tmin { -// r.f++ -// if r.f >= l { -// r.f -= l -// } -// r.l-- -// } -// return buf -// } - -// addSample is a handcoded specialization of genericAdd (see above). +// addSample adds a sample to a buffer of chunks.Sample, i.e. the general case +// using an interface as the type. func addSample(s chunks.Sample, buf []chunks.Sample, r *sampleRing) []chunks.Sample { l := len(buf) // Grow the ring buffer if it fits no more elements. @@ -607,7 +572,7 @@ func addSample(s chunks.Sample, buf []chunks.Sample, r *sampleRing) []chunks.Sam } } - buf[r.i] = s + buf[r.i] = s.Copy() r.l++ // Free head of the buffer of samples that just fell out of the range. @@ -622,7 +587,7 @@ func addSample(s chunks.Sample, buf []chunks.Sample, r *sampleRing) []chunks.Sam return buf } -// addF is a handcoded specialization of genericAdd (see above). +// addF adds an fSample to a (specialized) fSample buffer. func addF(s fSample, buf []fSample, r *sampleRing) []fSample { l := len(buf) // Grow the ring buffer if it fits no more elements. @@ -661,7 +626,7 @@ func addF(s fSample, buf []fSample, r *sampleRing) []fSample { return buf } -// addH is a handcoded specialization of genericAdd (see above). +// addH adds an hSample to a (specialized) hSample buffer. func addH(s hSample, buf []hSample, r *sampleRing) []hSample { l := len(buf) // Grow the ring buffer if it fits no more elements. @@ -705,7 +670,7 @@ func addH(s hSample, buf []hSample, r *sampleRing) []hSample { return buf } -// addFH is a handcoded specialization of genericAdd (see above). +// addFH adds an fhSample to a (specialized) fhSample buffer. func addFH(s fhSample, buf []fhSample, r *sampleRing) []fhSample { l := len(buf) // Grow the ring buffer if it fits no more elements. diff --git a/storage/buffer_test.go b/storage/buffer_test.go index b5c6443ac5..6e8e83db8f 100644 --- a/storage/buffer_test.go +++ b/storage/buffer_test.go @@ -314,6 +314,56 @@ func TestBufferedSeriesIteratorMixedHistograms(t *testing.T) { require.Equal(t, histograms[1].ToFloat(nil), fh) } +func TestBufferedSeriesIteratorMixedFloatsAndHistograms(t *testing.T) { + histograms := tsdbutil.GenerateTestHistograms(5) + + it := NewBufferIterator(NewListSeriesIteratorWithCopy(samples{ + hSample{t: 1, h: histograms[0].Copy()}, + fSample{t: 2, f: 2}, + hSample{t: 3, h: histograms[1].Copy()}, + hSample{t: 4, h: histograms[2].Copy()}, + fhSample{t: 3, fh: histograms[3].ToFloat(nil)}, + fhSample{t: 4, fh: histograms[4].ToFloat(nil)}, + }), 6) + + require.Equal(t, chunkenc.ValNone, it.Seek(7)) + require.NoError(t, it.Err()) + + buf := it.Buffer() + + require.Equal(t, chunkenc.ValHistogram, buf.Next()) + _, h0 := buf.AtHistogram() + require.Equal(t, histograms[0], h0) + + require.Equal(t, chunkenc.ValFloat, buf.Next()) + _, v := buf.At() + require.Equal(t, 2.0, v) + + require.Equal(t, chunkenc.ValHistogram, buf.Next()) + _, h1 := buf.AtHistogram() + require.Equal(t, histograms[1], h1) + + require.Equal(t, chunkenc.ValHistogram, buf.Next()) + _, h2 := buf.AtHistogram() + require.Equal(t, histograms[2], h2) + + require.Equal(t, chunkenc.ValFloatHistogram, buf.Next()) + _, h3 := buf.AtFloatHistogram(nil) + require.Equal(t, histograms[3].ToFloat(nil), h3) + + require.Equal(t, chunkenc.ValFloatHistogram, buf.Next()) + _, h4 := buf.AtFloatHistogram(nil) + require.Equal(t, histograms[4].ToFloat(nil), h4) + + // Test for overwrite bug where the buffered histogram was reused + // between items in the buffer. + require.Equal(t, histograms[0], h0) + require.Equal(t, histograms[1], h1) + require.Equal(t, histograms[2], h2) + require.Equal(t, histograms[3].ToFloat(nil), h3) + require.Equal(t, histograms[4].ToFloat(nil), h4) +} + func BenchmarkBufferedSeriesIterator(b *testing.B) { // Simulate a 5 minute rate. it := NewBufferIterator(newFakeSeriesIterator(int64(b.N), 30), 5*60) diff --git a/storage/fanout.go b/storage/fanout.go index e52342bc7e..4d076788a7 100644 --- a/storage/fanout.go +++ b/storage/fanout.go @@ -15,9 +15,8 @@ package storage import ( "context" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" @@ -28,7 +27,7 @@ import ( ) type fanout struct { - logger log.Logger + logger *slog.Logger primary Storage secondaries []Storage @@ -43,7 +42,7 @@ type fanout struct { // and the error from the secondary querier will be returned as a warning. // // NOTE: In the case of Prometheus, it treats all remote storages as secondary / best effort. -func NewFanout(logger log.Logger, primary Storage, secondaries ...Storage) Storage { +func NewFanout(logger *slog.Logger, primary Storage, secondaries ...Storage) Storage { return &fanout{ logger: logger, primary: primary, @@ -142,12 +141,22 @@ func (f *fanout) Close() error { // fanoutAppender implements Appender. type fanoutAppender struct { - logger log.Logger + logger *slog.Logger primary Appender secondaries []Appender } +// SetOptions propagates the hints to both primary and secondary appenders. +func (f *fanoutAppender) SetOptions(opts *AppendOptions) { + if f.primary != nil { + f.primary.SetOptions(opts) + } + for _, appender := range f.secondaries { + appender.SetOptions(opts) + } +} + func (f *fanoutAppender) Append(ref SeriesRef, l labels.Labels, t int64, v float64) (SeriesRef, error) { ref, err := f.primary.Append(ref, l, t, v) if err != nil { @@ -190,6 +199,20 @@ func (f *fanoutAppender) AppendHistogram(ref SeriesRef, l labels.Labels, t int64 return ref, nil } +func (f *fanoutAppender) AppendHistogramCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) { + ref, err := f.primary.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) + if err != nil { + return ref, err + } + + for _, appender := range f.secondaries { + if _, err := appender.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh); err != nil { + return 0, err + } + } + return ref, nil +} + func (f *fanoutAppender) UpdateMetadata(ref SeriesRef, l labels.Labels, m metadata.Metadata) (SeriesRef, error) { ref, err := f.primary.UpdateMetadata(ref, l, m) if err != nil { @@ -226,7 +249,7 @@ func (f *fanoutAppender) Commit() (err error) { err = appender.Commit() } else { if rollbackErr := appender.Rollback(); rollbackErr != nil { - level.Error(f.logger).Log("msg", "Squashed rollback error on commit", "err", rollbackErr) + f.logger.Error("Squashed rollback error on commit", "err", rollbackErr) } } } @@ -242,7 +265,7 @@ func (f *fanoutAppender) Rollback() (err error) { case err == nil: err = rollbackErr case rollbackErr != nil: - level.Error(f.logger).Log("msg", "Squashed rollback error on rollback", "err", rollbackErr) + f.logger.Error("Squashed rollback error on rollback", "err", rollbackErr) } } return nil diff --git a/storage/fanout_test.go b/storage/fanout_test.go index 4613fe7572..3eef9e3cd0 100644 --- a/storage/fanout_test.go +++ b/storage/fanout_test.go @@ -173,16 +173,13 @@ func TestFanoutErrors(t *testing.T) { } if tc.err != nil { - require.Error(t, ss.Err()) - require.Equal(t, tc.err.Error(), ss.Err().Error()) + require.EqualError(t, ss.Err(), tc.err.Error()) } if tc.warning != nil { - require.NotEmpty(t, ss.Warnings(), "warnings expected") w := ss.Warnings() - require.Error(t, w.AsErrors()[0]) - warn, _ := w.AsStrings("", 0, 0) - require.Equal(t, tc.warning.Error(), warn[0]) + require.NotEmpty(t, w, "warnings expected") + require.EqualError(t, w.AsErrors()[0], tc.warning.Error()) } }) t.Run("chunks", func(t *testing.T) { @@ -200,16 +197,13 @@ func TestFanoutErrors(t *testing.T) { } if tc.err != nil { - require.Error(t, ss.Err()) - require.Equal(t, tc.err.Error(), ss.Err().Error()) + require.EqualError(t, ss.Err(), tc.err.Error()) } if tc.warning != nil { - require.NotEmpty(t, ss.Warnings(), "warnings expected") w := ss.Warnings() - require.Error(t, w.AsErrors()[0]) - warn, _ := w.AsStrings("", 0, 0) - require.Equal(t, tc.warning.Error(), warn[0]) + require.NotEmpty(t, w, "warnings expected") + require.EqualError(t, w.AsErrors()[0], tc.warning.Error()) } }) } diff --git a/storage/interface.go b/storage/interface.go index 2f125e5902..56bb53dfe0 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -43,13 +43,15 @@ var ( ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength) ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") + ErrOOONativeHistogramsDisabled = fmt.Errorf("out-of-order native histogram ingestion is disabled") // ErrOutOfOrderCT indicates failed append of CT to the storage // due to CT being older the then newer sample. // NOTE(bwplotka): This can be both an instrumentation failure or commonly expected // behaviour, and we currently don't have a way to determine this. As a result // it's recommended to ignore this error for now. - ErrOutOfOrderCT = fmt.Errorf("created timestamp out of order, ignoring") + ErrOutOfOrderCT = fmt.Errorf("created timestamp out of order, ignoring") + ErrCTNewerThanSample = fmt.Errorf("CT is newer or the same as sample's timestamp, ignoring") ) // SeriesRef is a generic series reference. In prometheus it is either a @@ -112,6 +114,8 @@ type Querier interface { LabelQuerier // Select returns a set of series that matches the given label matchers. + // Results are not checked whether they match. Results that do not match + // may cause undefined behavior. // Caller can specify if it requires returned series to be sorted. Prefer not requiring sorting for better performance. // It allows passing hints that can help in optimising select, but it's up to implementation how this is used if used at all. Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) SeriesSet @@ -150,6 +154,8 @@ type ChunkQuerier interface { LabelQuerier // Select returns a set of series that matches the given label matchers. + // Results are not checked whether they match. Results that do not match + // may cause undefined behavior. // Caller can specify if it requires returned series to be sorted. Prefer not requiring sorting for better performance. // It allows passing hints that can help in optimising select, but it's up to implementation how this is used if used at all. Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) ChunkSeriesSet @@ -157,7 +163,7 @@ type ChunkQuerier interface { // LabelQuerier provides querying access over labels. type LabelQuerier interface { - // LabelValues returns all potential values for a label name. + // LabelValues returns all potential values for a label name in sorted order. // It is not safe to use the strings beyond the lifetime of the querier. // If matchers are specified the returned result set is reduced // to label values of metrics matching the matchers. @@ -237,6 +243,10 @@ func (f QueryableFunc) Querier(mint, maxt int64) (Querier, error) { return f(mint, maxt) } +type AppendOptions struct { + DiscardOutOfOrder bool +} + // Appender provides batched appends against a storage. // It must be completed with a call to Commit or Rollback and must not be reused afterwards. // @@ -265,6 +275,10 @@ type Appender interface { // Appender has to be discarded after rollback. Rollback() error + // SetOptions configures the appender with specific append options such as + // discarding out-of-order samples even if out-of-order is enabled in the TSDB. + SetOptions(opts *AppendOptions) + ExemplarAppender HistogramAppender MetadataUpdater @@ -312,6 +326,20 @@ type HistogramAppender interface { // pointer. AppendHistogram won't mutate the histogram, but in turn // depends on the caller to not mutate it either. AppendHistogram(ref SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) + // AppendHistogramCTZeroSample adds synthetic zero sample for the given ct timestamp, + // which will be associated with given series, labels and the incoming + // sample's t (timestamp). AppendHistogramCTZeroSample returns error if zero sample can't be + // appended, for example when ct is too old, or when it would collide with + // incoming sample (sample has priority). + // + // AppendHistogramCTZeroSample has to be called before the corresponding histogram AppendHistogram. + // A series reference number is returned which can be used to modify the + // CT for the given series in the same or later transactions. + // Returned reference numbers are ephemeral and may be rejected in calls + // to AppendHistogramCTZeroSample() at any point. + // + // If the reference is 0 it must not be used for caching. + AppendHistogramCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) } // MetadataUpdater provides an interface for associating metadata to stored series. diff --git a/storage/merge.go b/storage/merge.go index 2424b26ab7..a4d0934b16 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -153,13 +153,18 @@ func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints ) // Schedule all Selects for all queriers we know about. for _, querier := range q.queriers { + // copy the matchers as some queriers may alter the slice. + // See https://github.com/prometheus/prometheus/issues/14723 + matchersCopy := make([]*labels.Matcher, len(matchers)) + copy(matchersCopy, matchers) + wg.Add(1) - go func(qr genericQuerier) { + go func(qr genericQuerier, m []*labels.Matcher) { defer wg.Done() // We need to sort for NewMergeSeriesSet to work. - seriesSetChan <- qr.Select(ctx, true, hints, matchers...) - }(querier) + seriesSetChan <- qr.Select(ctx, true, hints, m...) + }(querier, matchersCopy) } go func() { wg.Wait() diff --git a/storage/remote/azuread/azuread_test.go b/storage/remote/azuread/azuread_test.go index 7c97138120..08870382ec 100644 --- a/storage/remote/azuread/azuread_test.go +++ b/storage/remote/azuread/azuread_test.go @@ -68,7 +68,7 @@ func (ad *AzureAdTestSuite) TestAzureAdRoundTripper() { cases := []struct { cfg *AzureADConfig }{ - // AzureAd roundtripper with Managedidentity. + // AzureAd roundtripper with ManagedIdentity. { cfg: &AzureADConfig{ Cloud: "AzurePublic", diff --git a/storage/remote/chunked_test.go b/storage/remote/chunked_test.go index 7c3993ca62..82ed866345 100644 --- a/storage/remote/chunked_test.go +++ b/storage/remote/chunked_test.go @@ -86,7 +86,7 @@ func TestChunkedReader_Overflow(t *testing.T) { _, err = NewChunkedReader(bytes.NewReader(b2), 11, nil).Next() require.Error(t, err, "expect exceed limit error") - require.Equal(t, "chunkedReader: message size exceeded the limit 11 bytes; got: 12 bytes", err.Error()) + require.EqualError(t, err, "chunkedReader: message size exceeded the limit 11 bytes; got: 12 bytes") } func TestChunkedReader_CorruptedFrame(t *testing.T) { @@ -102,5 +102,5 @@ func TestChunkedReader_CorruptedFrame(t *testing.T) { _, err = NewChunkedReader(bytes.NewReader(bs), 20, nil).Next() require.Error(t, err, "expected malformed frame") - require.Equal(t, "chunkedReader: corrupted frame; checksum mismatch", err.Error()) + require.EqualError(t, err, "chunkedReader: corrupted frame; checksum mismatch") } diff --git a/storage/remote/client.go b/storage/remote/client.go index 62218cfba9..23775122e5 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "net/http" + "net/http/httptrace" "strconv" "strings" "time" @@ -31,6 +32,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" + "go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace" @@ -213,8 +215,11 @@ func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) { if conf.WriteProtoMsg != "" { writeProtoMsg = conf.WriteProtoMsg } - - httpClient.Transport = otelhttp.NewTransport(t) + httpClient.Transport = otelhttp.NewTransport( + t, + otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace { + return otelhttptrace.NewClientTrace(ctx, otelhttptrace.WithoutSubSpans()) + })) return &Client{ remoteName: name, urlString: conf.URL.String(), diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index 404f1add75..c2fe6186ce 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -20,9 +20,9 @@ import ( "sync" "testing" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -253,8 +253,7 @@ func TestValidateLabelsAndMetricName(t *testing.T) { t.Run(test.description, func(t *testing.T) { err := validateLabelsAndMetricName(test.input) if test.expectedErr != "" { - require.Error(t, err) - require.Equal(t, test.expectedErr, err.Error()) + require.EqualError(t, err, test.expectedErr) } else { require.NoError(t, err) } @@ -551,7 +550,7 @@ func TestNegotiateResponseType(t *testing.T) { _, err = NegotiateResponseType([]prompb.ReadRequest_ResponseType{20}) require.Error(t, err, "expected error due to not supported requested response types") - require.Equal(t, "server does not support any of the requested response types: [20]; supported: map[SAMPLES:{} STREAMED_XOR_CHUNKS:{}]", err.Error()) + require.EqualError(t, err, "server does not support any of the requested response types: [20]; supported: map[SAMPLES:{} STREAMED_XOR_CHUNKS:{}]") } func TestMergeLabels(t *testing.T) { @@ -583,7 +582,7 @@ func TestDecodeWriteRequest(t *testing.T) { } func TestDecodeWriteV2Request(t *testing.T) { - buf, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + buf, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) actual, err := DecodeWriteV2Request(bytes.NewReader(buf)) diff --git a/storage/remote/metadata_watcher.go b/storage/remote/metadata_watcher.go index fdcd668f56..9306dcb4c2 100644 --- a/storage/remote/metadata_watcher.go +++ b/storage/remote/metadata_watcher.go @@ -16,11 +16,11 @@ package remote import ( "context" "errors" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/scrape" ) @@ -44,7 +44,7 @@ func (noop *noopScrapeManager) Get() (*scrape.Manager, error) { // MetadataWatcher watches the Scrape Manager for a given WriteMetadataTo. type MetadataWatcher struct { name string - logger log.Logger + logger *slog.Logger managerGetter ReadyScrapeManager manager Watchable @@ -62,9 +62,9 @@ type MetadataWatcher struct { } // NewMetadataWatcher builds a new MetadataWatcher. -func NewMetadataWatcher(l log.Logger, mg ReadyScrapeManager, name string, w MetadataAppender, interval model.Duration, deadline time.Duration) *MetadataWatcher { +func NewMetadataWatcher(l *slog.Logger, mg ReadyScrapeManager, name string, w MetadataAppender, interval model.Duration, deadline time.Duration) *MetadataWatcher { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if mg == nil { @@ -87,7 +87,7 @@ func NewMetadataWatcher(l log.Logger, mg ReadyScrapeManager, name string, w Meta // Start the MetadataWatcher. func (mw *MetadataWatcher) Start() { - level.Info(mw.logger).Log("msg", "Starting scraped metadata watcher") + mw.logger.Info("Starting scraped metadata watcher") mw.hardShutdownCtx, mw.hardShutdownCancel = context.WithCancel(context.Background()) mw.softShutdownCtx, mw.softShutdownCancel = context.WithCancel(mw.hardShutdownCtx) go mw.loop() @@ -95,15 +95,15 @@ func (mw *MetadataWatcher) Start() { // Stop the MetadataWatcher. func (mw *MetadataWatcher) Stop() { - level.Info(mw.logger).Log("msg", "Stopping metadata watcher...") - defer level.Info(mw.logger).Log("msg", "Scraped metadata watcher stopped") + mw.logger.Info("Stopping metadata watcher...") + defer mw.logger.Info("Scraped metadata watcher stopped") mw.softShutdownCancel() select { case <-mw.done: return case <-time.After(mw.deadline): - level.Error(mw.logger).Log("msg", "Failed to flush metadata") + mw.logger.Error("Failed to flush metadata") } mw.hardShutdownCancel() diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a112b9bbce..d5de2c7651 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -19,6 +19,8 @@ package prometheus import ( "strings" "unicode" + + "github.com/prometheus/prometheus/util/strutil" ) // Normalizes the specified label to follow Prometheus label names standard. @@ -26,7 +28,6 @@ import ( // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels. // // Labels that start with non-letter rune will be prefixed with "key_". -// // An exception is made for double-underscores which are allowed. func NormalizeLabel(label string) string { // Trivial case @@ -34,8 +35,7 @@ func NormalizeLabel(label string) string { return label } - // Replace all non-alphanumeric runes with underscores - label = strings.Map(sanitizeRune, label) + label = strutil.SanitizeLabelName(label) // If label starts with a number, prepend with "key_" if unicode.IsDigit(rune(label[0])) { @@ -46,11 +46,3 @@ func NormalizeLabel(label string) string { return label } - -// Return '_' for anything non-alphanumeric. -func sanitizeRune(r rune) rune { - if unicode.IsLetter(r) || unicode.IsDigit(r) { - return r - } - return '_' -} diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go new file mode 100644 index 0000000000..21d4d6a6d8 --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/normalize_label_test.go @@ -0,0 +1,45 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prometheus + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNormalizeLabel(t *testing.T) { + tests := []struct { + label string + expected string + }{ + {"", ""}, + {"label:with:colons", "label_with_colons"}, // Without UTF-8 support, colons are only allowed in metric names + {"LabelWithCapitalLetters", "LabelWithCapitalLetters"}, + {"label!with&special$chars)", "label_with_special_chars_"}, + {"label_with_foreign_characters_字符", "label_with_foreign_characters___"}, + {"label.with.dots", "label_with_dots"}, + {"123label", "key_123label"}, + {"_label_starting_with_underscore", "key_label_starting_with_underscore"}, + {"__label_starting_with_2underscores", "__label_starting_with_2underscores"}, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { + result := NormalizeLabel(test.label) + require.Equal(t, test.expected, result) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 0f472b80a0..0119b64dff 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -17,9 +17,12 @@ package prometheus import ( + "regexp" + "slices" "strings" "unicode" + "github.com/prometheus/prometheus/util/strutil" "go.opentelemetry.io/collector/pdata/pmetric" ) @@ -84,24 +87,27 @@ var perUnitMap = map[string]string{ // // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels, // https://prometheus.io/docs/practices/naming/#metric-and-label-naming -// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. +// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes bool) string { - var metricName string - // Full normalization following standard Prometheus naming conventions if addMetricSuffixes { return normalizeName(metric, namespace) } - // Simple case (no full normalization, no units, etc.), we simply trim out forbidden chars - metricName = RemovePromForbiddenRunes(metric.Name()) + // Regexp for metric name characters that should be replaced with _. + invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`) + + // Simple case (no full normalization, no units, etc.). + metricName := strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool { + return invalidMetricCharRE.MatchString(string(r)) + }), "_") // Namespace? if namespace != "" { return namespace + "_" + metricName } - // Metric name starts with a digit? Prefix it with an underscore + // Metric name starts with a digit? Prefix it with an underscore. if metricName != "" && unicode.IsDigit(rune(metricName[0])) { metricName = "_" + metricName } @@ -109,12 +115,17 @@ func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffix return metricName } -// Build a normalized name for the specified metric +// Build a normalized name for the specified metric. func normalizeName(metric pmetric.Metric, namespace string) string { - // Split metric name into "tokens" (remove all non-alphanumerics) + // Regexp for characters that can't be in a metric name token. + nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`) + + // Split metric name into "tokens" (of supported metric name runes). + // Note that this has the side effect of replacing multiple consecutive underscores with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. nameTokens := strings.FieldsFunc( metric.Name(), - func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }, + func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) }, ) // Split unit at the '/' if any @@ -123,11 +134,12 @@ func normalizeName(metric pmetric.Metric, namespace string) string { // Main unit // Append if not blank, doesn't contain '{}', and is not present in metric name already if len(unitTokens) > 0 { + var mainUnitProm, perUnitProm string mainUnitOTel := strings.TrimSpace(unitTokens[0]) if mainUnitOTel != "" && !strings.ContainsAny(mainUnitOTel, "{}") { - mainUnitProm := CleanUpString(unitMapGetOrDefault(mainUnitOTel)) - if mainUnitProm != "" && !contains(nameTokens, mainUnitProm) { - nameTokens = append(nameTokens, mainUnitProm) + mainUnitProm = cleanUpUnit(unitMapGetOrDefault(mainUnitOTel)) + if slices.Contains(nameTokens, mainUnitProm) { + mainUnitProm = "" } } @@ -136,13 +148,26 @@ func normalizeName(metric pmetric.Metric, namespace string) string { if len(unitTokens) > 1 && unitTokens[1] != "" { perUnitOTel := strings.TrimSpace(unitTokens[1]) if perUnitOTel != "" && !strings.ContainsAny(perUnitOTel, "{}") { - perUnitProm := CleanUpString(perUnitMapGetOrDefault(perUnitOTel)) - if perUnitProm != "" && !contains(nameTokens, perUnitProm) { - nameTokens = append(nameTokens, "per", perUnitProm) + perUnitProm = cleanUpUnit(perUnitMapGetOrDefault(perUnitOTel)) + } + if perUnitProm != "" { + perUnitProm = "per_" + perUnitProm + if slices.Contains(nameTokens, perUnitProm) { + perUnitProm = "" } } } + if perUnitProm != "" { + mainUnitProm = strings.TrimSuffix(mainUnitProm, "_") + } + + if mainUnitProm != "" { + nameTokens = append(nameTokens, mainUnitProm) + } + if perUnitProm != "" { + nameTokens = append(nameTokens, perUnitProm) + } } // Append _total for Counters @@ -235,13 +260,15 @@ func removeSuffix(tokens []string, suffix string) []string { return tokens } -// Clean up specified string so it's Prometheus compliant -func CleanUpString(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }), "_") -} - -func RemovePromForbiddenRunes(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' && r != ':' }), "_") +// cleanUpUnit cleans up unit so it matches model.LabelNameRE. +func cleanUpUnit(unit string) string { + // Multiple consecutive underscores are replaced with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + multipleUnderscoresRE := regexp.MustCompile(`__+`) + return strings.TrimPrefix(multipleUnderscoresRE.ReplaceAllString( + strutil.SanitizeLabelName(unit), + "_", + ), "_") } // Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit @@ -262,16 +289,6 @@ func perUnitMapGetOrDefault(perUnit string) string { return perUnit } -// Returns whether the slice contains the specified value -func contains(slice []string, value string) bool { - for _, sliceEntry := range slice { - if sliceEntry == value { - return true - } - } - return false -} - // Remove the specified value from the slice func removeItem(slice []string, value string) []string { newSlice := make([]string, 0, len(slice)) diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 07b9b0a784..2d5648e84c 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -148,13 +148,13 @@ func TestNamespace(t *testing.T) { require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space")) } -func TestCleanUpString(t *testing.T) { - require.Equal(t, "", CleanUpString("")) - require.Equal(t, "a_b", CleanUpString("a b")) - require.Equal(t, "hello_world", CleanUpString("hello, world!")) - require.Equal(t, "hello_you_2", CleanUpString("hello you 2")) - require.Equal(t, "1000", CleanUpString("$1000")) - require.Equal(t, "", CleanUpString("*+$^=)")) +func TestCleanUpUnit(t *testing.T) { + require.Equal(t, "", cleanUpUnit("")) + require.Equal(t, "a_b", cleanUpUnit("a b")) + require.Equal(t, "hello_world", cleanUpUnit("hello, world")) + require.Equal(t, "hello_you_2", cleanUpUnit("hello you 2")) + require.Equal(t, "1000", cleanUpUnit("$1000")) + require.Equal(t, "", cleanUpUnit("*+$^=)")) } func TestUnitMapGetOrDefault(t *testing.T) { @@ -179,17 +179,18 @@ func TestRemoveItem(t *testing.T) { require.Equal(t, []string{"b", "c"}, removeItem([]string{"a", "b", "c"}, "a")) } -func TestBuildCompliantNameWithNormalize(t *testing.T) { +func TestBuildCompliantNameWithSuffixes(t *testing.T) { require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true)) require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true)) require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true)) require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true)) - require.Equal(t, "foo_bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true)) - require.Equal(t, "foo_bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true)) + require.Equal(t, ":foo::bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true)) // Gauges with unit 1 are considered ratios. require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true)) // Slashes in units are converted. require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true)) + require.Equal(t, "metric_with_foreign_characters_total", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", true)) } func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { @@ -202,4 +203,5 @@ func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false)) require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false)) require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false)) + require.Equal(t, "metric_with___foreign_characters", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", false)) } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index fd7f58f073..f7fede258b 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -351,9 +351,17 @@ func getPromExemplars[T exemplarType](ctx context.Context, everyN *everyNTimes, exemplarRunes := 0 promExemplar := prompb.Exemplar{ - Value: exemplar.DoubleValue(), Timestamp: timestamp.FromTime(exemplar.Timestamp().AsTime()), } + switch exemplar.ValueType() { + case pmetric.ExemplarValueTypeInt: + promExemplar.Value = float64(exemplar.IntValue()) + case pmetric.ExemplarValueTypeDouble: + promExemplar.Value = exemplar.DoubleValue() + default: + return nil, fmt.Errorf("unsupported exemplar value type: %v", exemplar.ValueType()) + } + if traceID := exemplar.TraceID(); !traceID.IsEmpty() { val := hex.EncodeToString(traceID[:]) exemplarRunes += utf8.RuneCountInString(traceIDKey) + utf8.RuneCountInString(val) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index a48a57b062..b22282097d 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -48,7 +48,6 @@ func TestCreateAttributes(t *testing.T) { resource.Attributes().PutStr(k, v) } attrs := pcommon.NewMap() - attrs.PutStr("__name__", "test_metric") attrs.PutStr("metric-attr", "metric value") testCases := []struct { @@ -162,7 +161,7 @@ func TestCreateAttributes(t *testing.T) { settings := Settings{ PromoteResourceAttributes: tc.promoteResourceAttributes, } - lbls := createAttributes(resource, attrs, settings, nil, false) + lbls := createAttributes(resource, attrs, settings, nil, false, model.MetricNameLabel, "test_metric") assert.ElementsMatch(t, lbls, tc.expectedLabels) }) @@ -406,3 +405,38 @@ func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { }) } } + +func TestGetPromExemplars(t *testing.T) { + ctx := context.Background() + everyN := &everyNTimes{n: 1} + + t.Run("Exemplars with int value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetIntValue(42) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, float64(42), exemplars[0].Value) + }) + + t.Run("Exemplars with double value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetDoubleValue(69.420) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, 69.420, exemplars[0].Value) + }) + + t.Run("Exemplars with unsupported value type", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + _, err := getPromExemplars(ctx, everyN, pt) + assert.Error(t, err) + }) +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go index e064ab28a2..5fdd26ef29 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go @@ -23,12 +23,13 @@ import ( "time" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + "github.com/prometheus/prometheus/prompb" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -171,7 +172,7 @@ func TestConvertBucketsLayout(t *testing.T) { }, // Downscale: // 4+2+0+2, 0+0+0+0, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 0, 1 - // Check from sclaing from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 wantDeltas: []int64{8, -7}, }, }, @@ -222,7 +223,7 @@ func TestConvertBucketsLayout(t *testing.T) { }, // Downscale: // 4+2+0+2, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 1 - // Check from sclaing from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 wantDeltas: []int64{8, -8, 0, 1}, }, }, diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go index e932269644..b01d2cb1fe 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go @@ -22,10 +22,11 @@ import ( "time" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/prompb" ) func TestPrometheusConverter_addGaugeNumberDataPoints(t *testing.T) { diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index b1c8997268..9f27c333a6 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "strconv" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" @@ -407,7 +407,7 @@ type QueueManager struct { reshardDisableStartTimestamp atomic.Int64 // Time that reshard was disabled. reshardDisableEndTimestamp atomic.Int64 // Time that reshard is disabled until. - logger log.Logger + logger *slog.Logger flushDeadline time.Duration cfg config.QueueConfig mcfg config.MetadataConfig @@ -454,7 +454,7 @@ func NewQueueManager( metrics *queueManagerMetrics, watcherMetrics *wlog.WatcherMetrics, readerMetrics *wlog.LiveReaderMetrics, - logger log.Logger, + logger *slog.Logger, dir string, samplesIn *ewmaRate, cfg config.QueueConfig, @@ -471,7 +471,7 @@ func NewQueueManager( protoMsg config.RemoteWriteProtoMsg, ) *QueueManager { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } // Copy externalLabels into a slice, which we need for processExternalLabels. @@ -480,7 +480,7 @@ func NewQueueManager( extLabelsSlice = append(extLabelsSlice, l) }) - logger = log.With(logger, remoteName, client.Name(), endpoint, client.Endpoint()) + logger = logger.With(remoteName, client.Name(), endpoint, client.Endpoint()) t := &QueueManager{ logger: logger, flushDeadline: flushDeadline, @@ -526,7 +526,7 @@ func NewQueueManager( // ships them alongside series. If both mechanisms are set, the new one // takes precedence by implicitly disabling the older one. if t.mcfg.Send && t.protoMsg != config.RemoteWriteProtoMsgV1 { - level.Warn(logger).Log("msg", "usage of 'metadata_config.send' is redundant when using remote write v2 (or higher) as metadata will always be gathered from the WAL and included for every series within each write request") + logger.Warn("usage of 'metadata_config.send' is redundant when using remote write v2 (or higher) as metadata will always be gathered from the WAL and included for every series within each write request") t.mcfg.Send = false } @@ -567,7 +567,7 @@ func (t *QueueManager) AppendWatcherMetadata(ctx context.Context, metadata []scr err := t.sendMetadataWithBackoff(ctx, mm[i*t.mcfg.MaxSamplesPerSend:last], pBuf) if err != nil { t.metrics.failedMetadataTotal.Add(float64(last - (i * t.mcfg.MaxSamplesPerSend))) - level.Error(t.logger).Log("msg", "non-recoverable error while sending metadata", "count", last-(i*t.mcfg.MaxSamplesPerSend), "err", err) + t.logger.Error("non-recoverable error while sending metadata", "count", last-(i*t.mcfg.MaxSamplesPerSend), "err", err) } } } @@ -706,7 +706,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[s.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped sample for series that was not explicitly dropped via relabelling", "ref", s.Ref) + t.logger.Info("Dropped sample for series that was not explicitly dropped via relabelling", "ref", s.Ref) t.metrics.droppedSamplesTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedSamplesTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -769,7 +769,7 @@ outer: // Track dropped exemplars in the same EWMA for sharding calc. t.dataDropped.incr(1) if _, ok := t.droppedSeries[e.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped exemplar for series that was not explicitly dropped via relabelling", "ref", e.Ref) + t.logger.Info("Dropped exemplar for series that was not explicitly dropped via relabelling", "ref", e.Ref) t.metrics.droppedExemplarsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedExemplarsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -825,7 +825,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[h.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) + t.logger.Info("Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -880,7 +880,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[h.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) + t.logger.Info("Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -944,8 +944,8 @@ func (t *QueueManager) Start() { // Stop stops sending samples to the remote storage and waits for pending // sends to complete. func (t *QueueManager) Stop() { - level.Info(t.logger).Log("msg", "Stopping remote storage...") - defer level.Info(t.logger).Log("msg", "Remote storage stopped.") + t.logger.Info("Stopping remote storage...") + defer t.logger.Info("Remote storage stopped.") close(t.quit) t.wg.Wait() @@ -1093,10 +1093,10 @@ func (t *QueueManager) updateShardsLoop() { // to stay close to shardUpdateDuration. select { case t.reshardChan <- desiredShards: - level.Info(t.logger).Log("msg", "Remote storage resharding", "from", t.numShards, "to", desiredShards) + t.logger.Info("Remote storage resharding", "from", t.numShards, "to", desiredShards) t.numShards = desiredShards default: - level.Info(t.logger).Log("msg", "Currently resharding, skipping.") + t.logger.Info("Currently resharding, skipping.") } case <-t.quit: return @@ -1114,14 +1114,14 @@ func (t *QueueManager) shouldReshard(desiredShards int) bool { minSendTimestamp := time.Now().Add(-1 * shardUpdateDuration).Unix() lsts := t.lastSendTimestamp.Load() if lsts < minSendTimestamp { - level.Warn(t.logger).Log("msg", "Skipping resharding, last successful send was beyond threshold", "lastSendTimestamp", lsts, "minSendTimestamp", minSendTimestamp) + t.logger.Warn("Skipping resharding, last successful send was beyond threshold", "lastSendTimestamp", lsts, "minSendTimestamp", minSendTimestamp) return false } if disableTimestamp := t.reshardDisableEndTimestamp.Load(); time.Now().Unix() < disableTimestamp { disabledAt := time.Unix(t.reshardDisableStartTimestamp.Load(), 0) disabledFor := time.Until(time.Unix(disableTimestamp, 0)) - level.Warn(t.logger).Log("msg", "Skipping resharding, resharding is disabled while waiting for recoverable errors", "disabled_at", disabledAt, "disabled_for", disabledFor) + t.logger.Warn("Skipping resharding, resharding is disabled while waiting for recoverable errors", "disabled_at", disabledAt, "disabled_for", disabledFor) return false } return true @@ -1164,7 +1164,7 @@ func (t *QueueManager) calculateDesiredShards() int { desiredShards = timePerSample * (dataInRate*dataKeptRatio + backlogCatchup) ) t.metrics.desiredNumShards.Set(desiredShards) - level.Debug(t.logger).Log("msg", "QueueManager.calculateDesiredShards", + t.logger.Debug("QueueManager.calculateDesiredShards", "dataInRate", dataInRate, "dataOutRate", dataOutRate, "dataKeptRatio", dataKeptRatio, @@ -1182,7 +1182,7 @@ func (t *QueueManager) calculateDesiredShards() int { lowerBound = float64(t.numShards) * (1. - shardToleranceFraction) upperBound = float64(t.numShards) * (1. + shardToleranceFraction) ) - level.Debug(t.logger).Log("msg", "QueueManager.updateShardsLoop", + t.logger.Debug("QueueManager.updateShardsLoop", "lowerBound", lowerBound, "desiredShards", desiredShards, "upperBound", upperBound) desiredShards = math.Ceil(desiredShards) // Round up to be on the safe side. @@ -1193,7 +1193,7 @@ func (t *QueueManager) calculateDesiredShards() int { numShards := int(desiredShards) // Do not downshard if we are more than ten seconds back. if numShards < t.numShards && delay > 10.0 { - level.Debug(t.logger).Log("msg", "Not downsharding due to being too far behind") + t.logger.Debug("Not downsharding due to being too far behind") return t.numShards } @@ -1321,7 +1321,7 @@ func (s *shards) stop() { // Log error for any dropped samples, exemplars, or histograms. logDroppedError := func(t string, counter atomic.Uint32) { if dropped := counter.Load(); dropped > 0 { - level.Error(s.qm.logger).Log("msg", fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped) + s.qm.logger.Error(fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped) } } logDroppedError("samples", s.samplesDroppedOnHardShutdown) @@ -1564,7 +1564,7 @@ func (s *shards) runShard(ctx context.Context, shardID int, queue *queue) { nPendingSamples, nPendingExemplars, nPendingHistograms := populateTimeSeries(batch, pendingData, s.qm.sendExemplars, s.qm.sendNativeHistograms) n := nPendingSamples + nPendingExemplars + nPendingHistograms if timer { - level.Debug(s.qm.logger).Log("msg", "runShard timer ticked, sending buffered data", "samples", nPendingSamples, + s.qm.logger.Debug("runShard timer ticked, sending buffered data", "samples", nPendingSamples, "exemplars", nPendingExemplars, "shard", shardNum, "histograms", nPendingHistograms) } _ = s.sendSamples(ctx, pendingData[:n], nPendingSamples, nPendingExemplars, nPendingHistograms, pBuf, &buf, enc) @@ -1691,9 +1691,9 @@ func (s *shards) updateMetrics(_ context.Context, err error, sampleCount, exempl s.qm.metrics.failedExemplarsTotal.Add(float64(exemplarDiff)) } if err != nil { - level.Error(s.qm.logger).Log("msg", "non-recoverable error", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff, "err", err) + s.qm.logger.Error("non-recoverable error", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff, "err", err) } else if sampleDiff+exemplarDiff+histogramDiff > 0 { - level.Error(s.qm.logger).Log("msg", "we got 2xx status code from the Receiver yet statistics indicate some dat was not written; investigation needed", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff) + s.qm.logger.Error("we got 2xx status code from the Receiver yet statistics indicate some dat was not written; investigation needed", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff) } // These counters are used to calculate the dynamic sharding, and as such @@ -2018,16 +2018,16 @@ func (t *QueueManager) sendWriteRequestWithBackoff(ctx context.Context, attempt switch { case backoffErr.retryAfter > 0: sleepDuration = backoffErr.retryAfter - level.Info(t.logger).Log("msg", "Retrying after duration specified by Retry-After header", "duration", sleepDuration) + t.logger.Info("Retrying after duration specified by Retry-After header", "duration", sleepDuration) case backoffErr.retryAfter < 0: - level.Debug(t.logger).Log("msg", "retry-after cannot be in past, retrying using default backoff mechanism") + t.logger.Debug("retry-after cannot be in past, retrying using default backoff mechanism") } // We should never reshard for a recoverable error; increasing shards could // make the problem worse, particularly if we're getting rate limited. // // reshardDisableTimestamp holds the unix timestamp until which resharding - // is diableld. We'll update that timestamp if the period we were just told + // is disabled. We'll update that timestamp if the period we were just told // to sleep for is newer than the existing disabled timestamp. reshardWaitPeriod := time.Now().Add(time.Duration(sleepDuration) * 2) if oldTS, updated := setAtomicToNewer(&t.reshardDisableEndTimestamp, reshardWaitPeriod.Unix()); updated { @@ -2047,7 +2047,7 @@ func (t *QueueManager) sendWriteRequestWithBackoff(ctx context.Context, attempt // If we make it this far, we've encountered a recoverable error and will retry. onRetry() - level.Warn(t.logger).Log("msg", "Failed to send batch, retrying", "err", err) + t.logger.Warn("Failed to send batch, retrying", "err", err) backoff = sleepDuration * 2 @@ -2147,12 +2147,12 @@ func compressPayload(tmpbuf *[]byte, inp []byte, enc Compression) (compressed [] } } -func buildWriteRequest(logger log.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf *[]byte, filter func(prompb.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { +func buildWriteRequest(logger *slog.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf *[]byte, filter func(prompb.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { highest, lowest, timeSeries, droppedSamples, droppedExemplars, droppedHistograms := buildTimeSeries(timeSeries, filter) if droppedSamples > 0 || droppedExemplars > 0 || droppedHistograms > 0 { - level.Debug(logger).Log("msg", "dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) + logger.Debug("dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) } req := &prompb.WriteRequest{ @@ -2185,11 +2185,11 @@ func buildWriteRequest(logger log.Logger, timeSeries []prompb.TimeSeries, metada return compressed, highest, lowest, nil } -func buildV2WriteRequest(logger log.Logger, samples []writev2.TimeSeries, labels []string, pBuf, buf *[]byte, filter func(writev2.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { +func buildV2WriteRequest(logger *slog.Logger, samples []writev2.TimeSeries, labels []string, pBuf, buf *[]byte, filter func(writev2.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { highest, lowest, timeSeries, droppedSamples, droppedExemplars, droppedHistograms := buildV2TimeSeries(samples, filter) if droppedSamples > 0 || droppedExemplars > 0 || droppedHistograms > 0 { - level.Debug(logger).Log("msg", "dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) + logger.Debug("dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) } req := &writev2.Request{ diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 032a1a92f7..4b7c5a4e90 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -28,13 +28,13 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" @@ -351,7 +351,7 @@ func TestMetadataDelivery(t *testing.T) { require.Equal(t, 0.0, client_testutil.ToFloat64(m.metrics.failedMetadataTotal)) require.Len(t, c.receivedMetadata, numMetadata) - // One more write than the rounded qoutient should be performed in order to get samples that didn't + // One more write than the rounded quotient should be performed in order to get samples that didn't // fit into MaxSamplesPerSend. require.Equal(t, numMetadata/config.DefaultMetadataConfig.MaxSamplesPerSend+1, c.writesReceived) // Make sure the last samples were sent. @@ -1326,21 +1326,25 @@ func BenchmarkSampleSend(b *testing.B) { cfg.MaxShards = 20 // todo: test with new proto type(s) - m := newTestQueueManager(b, cfg, mcfg, defaultFlushDeadline, c, config.RemoteWriteProtoMsgV1) - m.StoreSeries(series, 0) + for _, format := range []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2} { + b.Run(string(format), func(b *testing.B) { + m := newTestQueueManager(b, cfg, mcfg, defaultFlushDeadline, c, format) + m.StoreSeries(series, 0) - // These should be received by the client. - m.Start() - defer m.Stop() + // These should be received by the client. + m.Start() + defer m.Stop() - b.ResetTimer() - for i := 0; i < b.N; i++ { - m.Append(samples) - m.UpdateSeriesSegment(series, i+1) // simulate what wlog.Watcher.garbageCollectSeries does - m.SeriesReset(i + 1) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.Append(samples) + m.UpdateSeriesSegment(series, i+1) // simulate what wlog.Watcher.garbageCollectSeries does + m.SeriesReset(i + 1) + } + // Do not include shutdown + b.StopTimer() + }) } - // Do not include shutdown - b.StopTimer() } // Check how long it takes to add N series, including external labels processing. @@ -1414,8 +1418,7 @@ func BenchmarkStartup(b *testing.B) { } sort.Ints(segments) - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stdout)) - logger = log.With(logger, "caller", log.DefaultCaller) + logger := promslog.New(&promslog.Config{}) cfg := testDefaultQueueConfig() mcfg := config.DefaultMetadataConfig @@ -1849,7 +1852,7 @@ func createDummyTimeSeries(instances int) []timeSeries { } func BenchmarkBuildWriteRequest(b *testing.B) { - noopLogger := log.NewNopLogger() + noopLogger := promslog.NewNopLogger() bench := func(b *testing.B, batch []timeSeries) { buff := make([]byte, 0) seriesBuff := make([]prompb.TimeSeries, len(batch)) @@ -1859,13 +1862,6 @@ func BenchmarkBuildWriteRequest(b *testing.B) { } pBuf := proto.NewBuffer(nil) - // Warmup buffers - for i := 0; i < 10; i++ { - populateTimeSeries(batch, seriesBuff, true, true) - buildWriteRequest(noopLogger, seriesBuff, nil, pBuf, &buff, nil, "snappy") - } - - b.ResetTimer() totalSize := 0 for i := 0; i < b.N; i++ { populateTimeSeries(batch, seriesBuff, true, true) @@ -1896,46 +1892,44 @@ func BenchmarkBuildWriteRequest(b *testing.B) { } func BenchmarkBuildV2WriteRequest(b *testing.B) { - noopLogger := log.NewNopLogger() - type testcase struct { - batch []timeSeries - } - testCases := []testcase{ - {createDummyTimeSeries(2)}, - {createDummyTimeSeries(10)}, - {createDummyTimeSeries(100)}, - } - for _, tc := range testCases { + noopLogger := promslog.NewNopLogger() + bench := func(b *testing.B, batch []timeSeries) { symbolTable := writev2.NewSymbolTable() buff := make([]byte, 0) - seriesBuff := make([]writev2.TimeSeries, len(tc.batch)) + seriesBuff := make([]writev2.TimeSeries, len(batch)) for i := range seriesBuff { seriesBuff[i].Samples = []writev2.Sample{{}} seriesBuff[i].Exemplars = []writev2.Exemplar{{}} } pBuf := []byte{} - // Warmup buffers - for i := 0; i < 10; i++ { - populateV2TimeSeries(&symbolTable, tc.batch, seriesBuff, true, true) - buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") - } - - b.Run(fmt.Sprintf("%d-instances", len(tc.batch)), func(b *testing.B) { - totalSize := 0 - for j := 0; j < b.N; j++ { - populateV2TimeSeries(&symbolTable, tc.batch, seriesBuff, true, true) - b.ResetTimer() - req, _, _, err := buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") - if err != nil { - b.Fatal(err) - } - symbolTable.Reset() - totalSize += len(req) - b.ReportMetric(float64(totalSize)/float64(b.N), "compressedSize/op") + totalSize := 0 + for i := 0; i < b.N; i++ { + populateV2TimeSeries(&symbolTable, batch, seriesBuff, true, true) + req, _, _, err := buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") + if err != nil { + b.Fatal(err) } - }) + totalSize += len(req) + b.ReportMetric(float64(totalSize)/float64(b.N), "compressedSize/op") + } } + + twoBatch := createDummyTimeSeries(2) + tenBatch := createDummyTimeSeries(10) + hundredBatch := createDummyTimeSeries(100) + + b.Run("2 instances", func(b *testing.B) { + bench(b, twoBatch) + }) + + b.Run("10 instances", func(b *testing.B) { + bench(b, tenBatch) + }) + + b.Run("1k instances", func(b *testing.B) { + bench(b, hundredBatch) + }) } func TestDropOldTimeSeries(t *testing.T) { diff --git a/storage/remote/read_handler.go b/storage/remote/read_handler.go index ffc64c9c3f..8f2945f974 100644 --- a/storage/remote/read_handler.go +++ b/storage/remote/read_handler.go @@ -16,13 +16,12 @@ package remote import ( "context" "errors" + "log/slog" "net/http" "slices" "strings" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/config" @@ -34,7 +33,7 @@ import ( ) type readHandler struct { - logger log.Logger + logger *slog.Logger queryable storage.SampleAndChunkQueryable config func() config.Config remoteReadSampleLimit int @@ -46,7 +45,7 @@ type readHandler struct { // NewReadHandler creates a http.Handler that accepts remote read requests and // writes them to the provided queryable. -func NewReadHandler(logger log.Logger, r prometheus.Registerer, queryable storage.SampleAndChunkQueryable, config func() config.Config, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame int) http.Handler { +func NewReadHandler(logger *slog.Logger, r prometheus.Registerer, queryable storage.SampleAndChunkQueryable, config func() config.Config, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame int) http.Handler { h := &readHandler{ logger: logger, queryable: queryable, @@ -140,7 +139,7 @@ func (h *readHandler) remoteReadSamples( } defer func() { if err := querier.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error on querier close", "err", err.Error()) + h.logger.Warn("Error on querier close", "err", err.Error()) } }() @@ -163,7 +162,7 @@ func (h *readHandler) remoteReadSamples( return err } for _, w := range ws { - level.Warn(h.logger).Log("msg", "Warnings on remote read query", "err", w.Error()) + h.logger.Warn("Warnings on remote read query", "err", w.Error()) } for _, ts := range resp.Results[i].Timeseries { ts.Labels = MergeLabels(ts.Labels, sortedExternalLabels) @@ -208,7 +207,7 @@ func (h *readHandler) remoteReadStreamedXORChunks(ctx context.Context, w http.Re } defer func() { if err := querier.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error on chunk querier close", "err", err.Error()) + h.logger.Warn("Error on chunk querier close", "err", err.Error()) } }() @@ -239,7 +238,7 @@ func (h *readHandler) remoteReadStreamedXORChunks(ctx context.Context, w http.Re } for _, w := range ws { - level.Warn(h.logger).Log("msg", "Warnings on chunked remote read query", "warnings", w.Error()) + h.logger.Warn("Warnings on chunked remote read query", "warnings", w.Error()) } return nil }(); err != nil { diff --git a/storage/remote/read_handler_test.go b/storage/remote/read_handler_test.go index 4cd4647e72..fd7f3ad48d 100644 --- a/storage/remote/read_handler_test.go +++ b/storage/remote/read_handler_test.go @@ -334,7 +334,7 @@ func TestStreamReadEndpoint(t *testing.T) { Type: prompb.Chunk_XOR, MinTimeMs: 7200000, MaxTimeMs: 7200000, - Data: []byte("\000\001\200\364\356\006@\307p\000\000\000\000\000\000"), + Data: []byte("\000\001\200\364\356\006@\307p\000\000\000\000\000"), }, }, }, @@ -381,7 +381,7 @@ func TestStreamReadEndpoint(t *testing.T) { Type: prompb.Chunk_XOR, MinTimeMs: 14400000, MaxTimeMs: 14400000, - Data: []byte("\000\001\200\350\335\r@\327p\000\000\000\000\000\000"), + Data: []byte("\000\001\200\350\335\r@\327p\000\000\000\000\000"), }, }, }, diff --git a/storage/remote/read_test.go b/storage/remote/read_test.go index d63cefc3fe..b78a8c6215 100644 --- a/storage/remote/read_test.go +++ b/storage/remote/read_test.go @@ -475,7 +475,9 @@ func TestSampleAndChunkQueryableClient(t *testing.T) { ) q, err := c.Querier(tc.mint, tc.maxt) require.NoError(t, err) - defer require.NoError(t, q.Close()) + defer func() { + require.NoError(t, q.Close()) + }() ss := q.Select(context.Background(), true, nil, tc.matchers...) require.NoError(t, err) diff --git a/storage/remote/storage.go b/storage/remote/storage.go index 05634f1798..14c3c87d93 100644 --- a/storage/remote/storage.go +++ b/storage/remote/storage.go @@ -18,12 +18,13 @@ import ( "crypto/md5" "encoding/hex" "fmt" + "log/slog" "sync" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" @@ -51,8 +52,9 @@ type startTimeCallback func() (int64, error) // Storage represents all the remote read and write endpoints. It implements // storage.Storage. type Storage struct { - logger *logging.Deduper - mtx sync.Mutex + deduper *logging.Deduper + logger *slog.Logger + mtx sync.Mutex rws *WriteStorage @@ -62,14 +64,16 @@ type Storage struct { } // NewStorage returns a remote.Storage. -func NewStorage(l log.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWAL bool) *Storage { +func NewStorage(l *slog.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWAL bool) *Storage { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } - logger := logging.Dedupe(l, 1*time.Minute) + deduper := logging.Dedupe(l, 1*time.Minute) + logger := slog.New(deduper) s := &Storage{ logger: logger, + deduper: deduper, localStartTimeCallback: stCallback, } s.rws = NewWriteStorage(s.logger, reg, walDir, flushDeadline, sm, metadataInWAL) @@ -196,7 +200,7 @@ func (s *Storage) LowestSentTimestamp() int64 { // Close the background processing of the storage queues. func (s *Storage) Close() error { - s.logger.Stop() + s.deduper.Stop() s.mtx.Lock() defer s.mtx.Unlock() return s.rws.Close() diff --git a/storage/remote/write.go b/storage/remote/write.go index 3d2f1fdfcd..639f344520 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -17,13 +17,14 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "sync" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" @@ -57,7 +58,7 @@ var ( // WriteStorage represents all the remote write storage. type WriteStorage struct { - logger log.Logger + logger *slog.Logger reg prometheus.Registerer mtx sync.Mutex @@ -78,9 +79,9 @@ type WriteStorage struct { } // NewWriteStorage creates and runs a WriteStorage. -func NewWriteStorage(logger log.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage { +func NewWriteStorage(logger *slog.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } rws := &WriteStorage{ queues: make(map[string]*QueueManager), @@ -277,6 +278,7 @@ func (rws *WriteStorage) Close() error { type timestampTracker struct { writeStorage *WriteStorage + appendOptions *storage.AppendOptions samples int64 exemplars int64 histograms int64 @@ -284,6 +286,10 @@ type timestampTracker struct { highestRecvTimestamp *maxTimestamp } +func (t *timestampTracker) SetOptions(opts *storage.AppendOptions) { + t.appendOptions = opts +} + // Append implements storage.Appender. func (t *timestampTracker) Append(_ storage.SeriesRef, _ labels.Labels, ts int64, _ float64) (storage.SeriesRef, error) { t.samples++ @@ -306,14 +312,29 @@ func (t *timestampTracker) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, return 0, nil } -func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { - // TODO: Add and increment a `metadata` field when we get around to wiring metadata in remote_write. - // UpadteMetadata is no-op for remote write (where timestampTracker is being used) for now. +func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64) (storage.SeriesRef, error) { + t.samples++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } return 0, nil } -func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { - // AppendCTZeroSample is no-op for remote-write for now. +func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { + t.histograms++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } + return 0, nil +} + +func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { + // TODO: Add and increment a `metadata` field when we get around to wiring metadata in remote_write. + // UpdateMetadata is no-op for remote write (where timestampTracker is being used) for now. return 0, nil } diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 736bc8eff3..466673c99d 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -18,12 +18,11 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" @@ -42,7 +41,7 @@ import ( ) type writeHandler struct { - logger log.Logger + logger *slog.Logger appendable storage.Appendable samplesWithInvalidLabelsTotal prometheus.Counter @@ -58,7 +57,7 @@ const maxAheadTime = 10 * time.Minute // // NOTE(bwplotka): When accepting v2 proto and spec, partial writes are possible // as per https://prometheus.io/docs/specs/remote_write_spec_2_0/#partial-write. -func NewWriteHandler(logger log.Logger, reg prometheus.Registerer, appendable storage.Appendable, acceptedProtoMsgs []config.RemoteWriteProtoMsg) http.Handler { +func NewWriteHandler(logger *slog.Logger, reg prometheus.Registerer, appendable storage.Appendable, acceptedProtoMsgs []config.RemoteWriteProtoMsg) http.Handler { protoMsgs := map[config.RemoteWriteProtoMsg]struct{}{} for _, acc := range acceptedProtoMsgs { protoMsgs[acc] = struct{}{} @@ -119,7 +118,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { msgType, err := h.parseProtoMsg(contentType) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) return } @@ -131,7 +130,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } return ret }()) - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) } @@ -142,14 +141,14 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // We could give http.StatusUnsupportedMediaType, but let's assume snappy by default. } else if enc != string(SnappyBlockCompression) { err := fmt.Errorf("%v encoding (compression) is not accepted by this server; only %v is acceptable", enc, SnappyBlockCompression) - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) } // Read the request body. body, err := io.ReadAll(r.Body) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err.Error()) + h.logger.Error("Error decoding remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -157,7 +156,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { decompressed, err := snappy.Decode(nil, body) if err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decompressing remote write request", "err", err.Error()) + h.logger.Error("Error decompressing remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -169,7 +168,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var req prompb.WriteRequest if err := proto.Unmarshal(decompressed, &req); err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decoding v1 remote write request", "protobuf_message", msgType, "err", err.Error()) + h.logger.Error("Error decoding v1 remote write request", "protobuf_message", msgType, "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -180,7 +179,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return default: - level.Error(h.logger).Log("msg", "Error while remote writing the v1 request", "err", err.Error()) + h.logger.Error("Error while remote writing the v1 request", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -193,7 +192,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var req writev2.Request if err := proto.Unmarshal(decompressed, &req); err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decoding v2 remote write request", "protobuf_message", msgType, "err", err.Error()) + h.logger.Error("Error decoding v2 remote write request", "protobuf_message", msgType, "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -205,7 +204,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if err != nil { if errHTTPCode/5 == 100 { // 5xx - level.Error(h.logger).Log("msg", "Error while remote writing the v2 request", "err", err.Error()) + h.logger.Error("Error while remote writing the v2 request", "err", err.Error()) } http.Error(w, err.Error(), errHTTPCode) return @@ -241,11 +240,11 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err // TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are // potentially written. Perhaps unify with fixed writeV2 implementation a bit. if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) { - level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String()) + h.logger.Warn("Invalid metric names or labels", "got", ls.String()) samplesWithInvalidLabels++ continue } else if duplicateLabel, hasDuplicate := ls.HasDuplicateLabelNames(); hasDuplicate { - level.Warn(h.logger).Log("msg", "Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel) + h.logger.Warn("Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel) samplesWithInvalidLabels++ continue } @@ -261,10 +260,10 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err switch { case errors.Is(err, storage.ErrOutOfOrderExemplar): outOfOrderExemplarErrs++ - level.Debug(h.logger).Log("msg", "Out of order exemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Debug("Out of order exemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) default: // Since exemplar storage is still experimental, we don't fail the request on ingestion errors - level.Debug(h.logger).Log("msg", "Error while adding exemplar in AppendExemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e), "err", err) + h.logger.Debug("Error while adding exemplar in AppendExemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e), "err", err) } } } @@ -276,7 +275,7 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err } if outOfOrderExemplarErrs > 0 { - _ = level.Warn(h.logger).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) + h.logger.Warn("Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) } if samplesWithInvalidLabels > 0 { h.samplesWithInvalidLabelsTotal.Add(float64(samplesWithInvalidLabels)) @@ -293,7 +292,7 @@ func (h *writeHandler) appendV1Samples(app storage.Appender, ss []prompb.Sample, if errors.Is(err, storage.ErrOutOfOrderSample) || errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { - level.Error(h.logger).Log("msg", "Out of order sample from remote write", "err", err.Error(), "series", labels.String(), "timestamp", s.Timestamp) + h.logger.Error("Out of order sample from remote write", "err", err.Error(), "series", labels.String(), "timestamp", s.Timestamp) } return err } @@ -315,7 +314,7 @@ func (h *writeHandler) appendV1Histograms(app storage.Appender, hh []prompb.Hist if errors.Is(err, storage.ErrOutOfOrderSample) || errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { - level.Error(h.logger).Log("msg", "Out of order histogram from remote write", "err", err.Error(), "series", labels.String(), "timestamp", hp.Timestamp) + h.logger.Error("Out of order histogram from remote write", "err", err.Error(), "series", labels.String(), "timestamp", hp.Timestamp) } return err } @@ -345,7 +344,7 @@ func (h *writeHandler) writeV2(ctx context.Context, req *writev2.Request) (_ Wri // On 5xx, we always rollback, because we expect // sender to retry and TSDB is not idempotent. if rerr := app.Rollback(); rerr != nil { - level.Error(h.logger).Log("msg", "writev2 rollback failed on retry-able error", "err", rerr) + h.logger.Error("writev2 rollback failed on retry-able error", "err", rerr) } return WriteResponseStats{}, errHTTPCode, err } @@ -407,7 +406,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * errors.Is(err, storage.ErrDuplicateSampleForTimestamp) || errors.Is(err, storage.ErrTooOldSample) { // TODO(bwplotka): Not too spammy log? - level.Error(h.logger).Log("msg", "Out of order sample from remote write", "err", err.Error(), "series", ls.String(), "timestamp", s.Timestamp) + h.logger.Error("Out of order sample from remote write", "err", err.Error(), "series", ls.String(), "timestamp", s.Timestamp) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } @@ -432,7 +431,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { // TODO(bwplotka): Not too spammy log? - level.Error(h.logger).Log("msg", "Out of order histogram from remote write", "err", err.Error(), "series", ls.String(), "timestamp", hp.Timestamp) + h.logger.Error("Out of order histogram from remote write", "err", err.Error(), "series", ls.String(), "timestamp", hp.Timestamp) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } @@ -450,18 +449,18 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // Handle append error. if errors.Is(err, storage.ErrOutOfOrderExemplar) { outOfOrderExemplarErrs++ // Maintain old metrics, but technically not needed, given we fail here. - level.Error(h.logger).Log("msg", "Out of order exemplar", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Error("Out of order exemplar", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } // TODO(bwplotka): Add strict mode which would trigger rollback of everything if needed. // For now we keep the previously released flow (just error not debug leve) of dropping them without rollback and 5xx. - level.Error(h.logger).Log("msg", "failed to ingest exemplar, emitting error log, but no error for PRW caller", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Error("failed to ingest exemplar, emitting error log, but no error for PRW caller", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) } m := ts.ToMetadata(req.Symbols) if _, err = app.UpdateMetadata(ref, ls, m); err != nil { - level.Debug(h.logger).Log("msg", "error while updating metadata from remote write", "err", err) + h.logger.Debug("error while updating metadata from remote write", "err", err) // Metadata is attached to each series, so since Prometheus does not reject sample without metadata information, // we don't report remote write error either. We increment metric instead. samplesWithoutMetadata += rs.AllSamples() - allSamplesSoFar @@ -469,7 +468,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * } if outOfOrderExemplarErrs > 0 { - level.Warn(h.logger).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) + h.logger.Warn("Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) } h.samplesWithInvalidLabelsTotal.Add(float64(samplesWithInvalidLabels)) @@ -482,7 +481,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // NewOTLPWriteHandler creates a http.Handler that accepts OTLP write requests and // writes them to the provided appendable. -func NewOTLPWriteHandler(logger log.Logger, appendable storage.Appendable, configFunc func() config.Config) http.Handler { +func NewOTLPWriteHandler(logger *slog.Logger, appendable storage.Appendable, configFunc func() config.Config) http.Handler { rwHandler := &writeHandler{ logger: logger, appendable: appendable, @@ -496,7 +495,7 @@ func NewOTLPWriteHandler(logger log.Logger, appendable storage.Appendable, confi } type otlpWriteHandler struct { - logger log.Logger + logger *slog.Logger rwHandler *writeHandler configFunc func() config.Config } @@ -504,7 +503,7 @@ type otlpWriteHandler struct { func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { req, err := DecodeOTLPWriteRequest(r) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err.Error()) + h.logger.Error("Error decoding remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -517,11 +516,11 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, }) if err != nil { - level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) + h.logger.Warn("Error translating OTLP metrics to Prometheus write request", "err", err) } ws, _ := annots.AsStrings("", 0, 0) if len(ws) > 0 { - level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) + h.logger.Warn("Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ @@ -535,7 +534,7 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return default: - level.Error(h.logger).Log("msg", "Error appending remote write", "err", err.Error()) + h.logger.Error("Error appending remote write", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index 5c89a1ab95..580c7c143e 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -27,11 +27,12 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" @@ -129,7 +130,7 @@ func TestRemoteWriteHandlerHeadersHandling_V1Message(t *testing.T) { } appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -144,7 +145,7 @@ func TestRemoteWriteHandlerHeadersHandling_V1Message(t *testing.T) { } func TestRemoteWriteHandlerHeadersHandling_V2Message(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) for _, tc := range []struct { @@ -230,7 +231,7 @@ func TestRemoteWriteHandlerHeadersHandling_V2Message(t *testing.T) { } appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -255,7 +256,7 @@ func TestRemoteWriteHandler_V1Message(t *testing.T) { // in Prometheus, so keeping like this to not break existing 1.0 clients. appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -428,7 +429,7 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) { }, } { t.Run(tc.desc, func(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), tc.input, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), tc.input, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) req, err := http.NewRequest("", "", bytes.NewReader(payload)) @@ -445,7 +446,7 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) { appendExemplarErr: tc.appendExemplarErr, updateMetadataErr: tc.updateMetadataErr, } - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -544,7 +545,7 @@ func TestOutOfOrderSample_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -586,7 +587,7 @@ func TestOutOfOrderExemplar_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -624,7 +625,7 @@ func TestOutOfOrderHistogram_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -655,7 +656,7 @@ func BenchmarkRemoteWriteHandler(b *testing.B) { appendable := &mockAppendable{} // TODO: test with other proto format(s) - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() b.ResetTimer() @@ -672,7 +673,7 @@ func TestCommitErr_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -685,7 +686,7 @@ func TestCommitErr_V1Message(t *testing.T) { } func TestCommitErr_V2Message(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) req, err := http.NewRequest("", "", bytes.NewReader(payload)) @@ -696,7 +697,7 @@ func TestCommitErr_V2Message(t *testing.T) { req.Header.Set(RemoteWriteVersionHeader, RemoteWriteVersion20HeaderValue) appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -723,7 +724,7 @@ func BenchmarkRemoteWriteOOOSamples(b *testing.B) { require.NoError(b, db.Close()) }) // TODO: test with other proto format(s) - handler := NewWriteHandler(log.NewNopLogger(), nil, db.Head(), []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, db.Head(), []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) buf, _, _, err := buildWriteRequest(nil, genSeriesWithSample(1000, 200*time.Minute.Milliseconds()), nil, nil, nil, nil, "snappy") require.NoError(b, err) @@ -832,6 +833,10 @@ func (m *mockAppendable) Appender(_ context.Context) storage.Appender { return m } +func (m *mockAppendable) SetOptions(opts *storage.AppendOptions) { + panic("unimplemented") +} + func (m *mockAppendable) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if m.appendSampleErr != nil { return 0, m.appendSampleErr @@ -915,6 +920,13 @@ func (m *mockAppendable) AppendHistogram(_ storage.SeriesRef, l labels.Labels, t return 0, nil } +func (m *mockAppendable) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + // AppendCTZeroSample is no-op for remote-write for now. + // TODO(bwplotka/arthursens): Add support for PRW 2.0 for CT zero feature (but also we might + // replace this with in-metadata CT storage, see https://github.com/prometheus/prometheus/issues/14218). + return 0, nil +} + func (m *mockAppendable) UpdateMetadata(_ storage.SeriesRef, l labels.Labels, mp metadata.Metadata) (storage.SeriesRef, error) { if m.updateMetadataErr != nil { return 0, m.updateMetadataErr diff --git a/storage/series.go b/storage/series.go index 70e3d0a199..a3dbec7088 100644 --- a/storage/series.go +++ b/storage/series.go @@ -171,6 +171,34 @@ func (it *listSeriesIterator) Seek(t int64) chunkenc.ValueType { func (it *listSeriesIterator) Err() error { return nil } +type listSeriesIteratorWithCopy struct { + *listSeriesIterator +} + +func NewListSeriesIteratorWithCopy(samples Samples) chunkenc.Iterator { + return &listSeriesIteratorWithCopy{ + listSeriesIterator: &listSeriesIterator{samples: samples, idx: -1}, + } +} + +func (it *listSeriesIteratorWithCopy) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) { + t, ih := it.listSeriesIterator.AtHistogram(nil) + if h == nil || ih == nil { + return t, ih + } + ih.CopyTo(h) + return t, h +} + +func (it *listSeriesIteratorWithCopy) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + t, ih := it.listSeriesIterator.AtFloatHistogram(nil) + if fh == nil || ih == nil { + return t, ih + } + ih.CopyTo(fh) + return t, fh +} + type listChunkSeriesIterator struct { chks []chunks.Meta idx int diff --git a/tracing/tracing.go b/tracing/tracing.go index 6b9319ecbd..4fdedf505b 100644 --- a/tracing/tracing.go +++ b/tracing/tracing.go @@ -16,11 +16,10 @@ package tracing import ( "context" "fmt" + "log/slog" "reflect" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/version" "go.opentelemetry.io/otel" @@ -43,14 +42,14 @@ const serviceName = "prometheus" // Manager is capable of building, (re)installing and shutting down // the tracer provider. type Manager struct { - logger log.Logger + logger *slog.Logger done chan struct{} config config.TracingConfig shutdownFunc func() error } // NewManager creates a new tracing manager. -func NewManager(logger log.Logger) *Manager { +func NewManager(logger *slog.Logger) *Manager { return &Manager{ logger: logger, done: make(chan struct{}), @@ -62,7 +61,7 @@ func NewManager(logger log.Logger) *Manager { func (m *Manager) Run() { otel.SetTextMapPropagator(propagation.TraceContext{}) otel.SetErrorHandler(otelErrHandler(func(err error) { - level.Error(m.logger).Log("msg", "OpenTelemetry handler returned an error", "err", err) + m.logger.Error("OpenTelemetry handler returned an error", "err", err.Error()) })) <-m.done } @@ -89,7 +88,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { m.config = cfg.TracingConfig m.shutdownFunc = nil otel.SetTracerProvider(noop.NewTracerProvider()) - level.Info(m.logger).Log("msg", "Tracing provider uninstalled.") + m.logger.Info("Tracing provider uninstalled.") return nil } @@ -102,7 +101,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { m.config = cfg.TracingConfig otel.SetTracerProvider(tp) - level.Info(m.logger).Log("msg", "Successfully installed a new tracer provider.") + m.logger.Info("Successfully installed a new tracer provider.") return nil } @@ -115,10 +114,10 @@ func (m *Manager) Stop() { } if err := m.shutdownFunc(); err != nil { - level.Error(m.logger).Log("msg", "failed to shut down the tracer provider", "err", err) + m.logger.Error("failed to shut down the tracer provider", "err", err) } - level.Info(m.logger).Log("msg", "Tracing manager stopped") + m.logger.Info("Tracing manager stopped") } type otelErrHandler func(err error) diff --git a/tracing/tracing_test.go b/tracing/tracing_test.go index b7996c6104..e735e1a18a 100644 --- a/tracing/tracing_test.go +++ b/tracing/tracing_test.go @@ -16,8 +16,8 @@ package tracing import ( "testing" - "github.com/go-kit/log" config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace/noop" @@ -28,7 +28,7 @@ import ( func TestInstallingNewTracerProvider(t *testing.T) { tpBefore := otel.GetTracerProvider() - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -41,7 +41,7 @@ func TestInstallingNewTracerProvider(t *testing.T) { } func TestReinstallingTracerProvider(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -76,7 +76,7 @@ func TestReinstallingTracerProvider(t *testing.T) { } func TestReinstallingTracerProviderWithTLS(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -96,7 +96,7 @@ func TestReinstallingTracerProviderWithTLS(t *testing.T) { } func TestUninstallingTracerProvider(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -118,7 +118,7 @@ func TestUninstallingTracerProvider(t *testing.T) { } func TestTracerProviderShutdown(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 9697739e00..3863e6cd99 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -17,14 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "path/filepath" "sync" "time" "unicode/utf8" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "go.uber.org/atomic" @@ -226,7 +225,7 @@ func (m *dbMetrics) Unregister() { // DB represents a WAL-only storage. It implements storage.DB. type DB struct { mtx sync.RWMutex - logger log.Logger + logger *slog.Logger opts *Options rs *remote.Storage @@ -251,7 +250,7 @@ type DB struct { } // Open returns a new agent.DB in the given directory. -func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir string, opts *Options) (*DB, error) { +func Open(l *slog.Logger, reg prometheus.Registerer, rs *remote.Storage, dir string, opts *Options) (*DB, error) { opts = validateOptions(opts) locker, err := tsdbutil.NewDirLocker(dir, "agent", l, reg) @@ -306,11 +305,11 @@ func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir strin } if err := db.replayWAL(); err != nil { - level.Warn(db.logger).Log("msg", "encountered WAL read error, attempting repair", "err", err) + db.logger.Warn("encountered WAL read error, attempting repair", "err", err) if err := w.Repair(err); err != nil { return nil, fmt.Errorf("repair corrupted WAL: %w", err) } - level.Info(db.logger).Log("msg", "successfully repaired WAL") + db.logger.Info("successfully repaired WAL") } go db.run() @@ -335,7 +334,7 @@ func validateOptions(opts *Options) *Options { opts.WALCompression = wlog.CompressionNone } - // Revert Stripesize to DefaultStripsize if Stripsize is either 0 or not a power of 2. + // Revert StripeSize to DefaultStripeSize if StripeSize is either 0 or not a power of 2. if opts.StripeSize <= 0 || ((opts.StripeSize & (opts.StripeSize - 1)) != 0) { opts.StripeSize = tsdb.DefaultStripeSize } @@ -359,7 +358,7 @@ func validateOptions(opts *Options) *Options { } func (db *DB) replayWAL() error { - level.Info(db.logger).Log("msg", "replaying WAL, this may take a while", "dir", db.wal.Dir()) + db.logger.Info("replaying WAL, this may take a while", "dir", db.wal.Dir()) start := time.Now() dir, startFrom, err := wlog.LastCheckpoint(db.wal.Dir()) @@ -376,7 +375,7 @@ func (db *DB) replayWAL() error { } defer func() { if err := sr.Close(); err != nil { - level.Warn(db.logger).Log("msg", "error while closing the wal segments reader", "err", err) + db.logger.Warn("error while closing the wal segments reader", "err", err) } }() @@ -386,7 +385,7 @@ func (db *DB) replayWAL() error { return fmt.Errorf("backfill checkpoint: %w", err) } startFrom++ - level.Info(db.logger).Log("msg", "WAL checkpoint loaded") + db.logger.Info("WAL checkpoint loaded") } // Find the last segment. @@ -395,7 +394,7 @@ func (db *DB) replayWAL() error { return fmt.Errorf("finding WAL segments: %w", err) } - // Backfil segments from the most recent checkpoint onwards. + // Backfill segments from the most recent checkpoint onwards. for i := startFrom; i <= last; i++ { seg, err := wlog.OpenReadSegment(wlog.SegmentName(db.wal.Dir(), i)) if err != nil { @@ -405,12 +404,12 @@ func (db *DB) replayWAL() error { sr := wlog.NewSegmentBufReader(seg) err = db.loadWAL(wlog.NewReader(sr), multiRef) if err := sr.Close(); err != nil { - level.Warn(db.logger).Log("msg", "error while closing the wal segments reader", "err", err) + db.logger.Warn("error while closing the wal segments reader", "err", err) } if err != nil { return err } - level.Info(db.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last) + db.logger.Info("WAL segment loaded", "segment", i, "maxSegment", last) } walReplayDuration := time.Since(start) @@ -571,7 +570,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H } if v := nonExistentSeriesRefs.Load(); v > 0 { - level.Warn(db.logger).Log("msg", "found sample referencing non-existing series", "skipped_series", v) + db.logger.Warn("found sample referencing non-existing series", "skipped_series", v) } db.nextRef.Store(uint64(lastRef)) @@ -616,9 +615,9 @@ Loop: ts = maxTS } - level.Debug(db.logger).Log("msg", "truncating the WAL", "ts", ts) + db.logger.Debug("truncating the WAL", "ts", ts) if err := db.truncate(ts); err != nil { - level.Warn(db.logger).Log("msg", "failed to truncate WAL", "err", err) + db.logger.Warn("failed to truncate WAL", "err", err) } } } @@ -631,7 +630,7 @@ func (db *DB) truncate(mint int64) error { start := time.Now() db.gc(mint) - level.Info(db.logger).Log("msg", "series GC completed", "duration", time.Since(start)) + db.logger.Info("series GC completed", "duration", time.Since(start)) first, last, err := wlog.Segments(db.wal.Dir()) if err != nil { @@ -679,7 +678,7 @@ func (db *DB) truncate(mint int64) error { // If truncating fails, we'll just try it again at the next checkpoint. // Leftover segments will still just be ignored in the future if there's a // checkpoint that supersedes them. - level.Error(db.logger).Log("msg", "truncating segments failed", "err", err) + db.logger.Error("truncating segments failed", "err", err) } // The checkpoint is written and segments before it are truncated, so we @@ -696,13 +695,13 @@ func (db *DB) truncate(mint int64) error { // Leftover old checkpoints do not cause problems down the line beyond // occupying disk space. They will just be ignored since a newer checkpoint // exists. - level.Error(db.logger).Log("msg", "delete old checkpoints", "err", err) + db.logger.Error("delete old checkpoints", "err", err) db.metrics.checkpointDeleteFail.Inc() } db.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) - level.Info(db.logger).Log("msg", "WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) + db.logger.Info("WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) return nil } @@ -764,6 +763,7 @@ func (db *DB) Close() error { type appender struct { *DB + hints *storage.AppendOptions pendingSeries []record.RefSeries pendingSamples []record.RefSample @@ -784,6 +784,10 @@ type appender struct { floatHistogramSeries []*memSeries } +func (a *appender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts +} + func (a *appender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { // series references and chunk references are identical for agent mode. headRef := chunks.HeadSeriesRef(ref) @@ -977,9 +981,134 @@ func (a *appender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Met return 0, nil } -func (a *appender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { - // TODO(bwplotka): Wire metadata in the Agent's appender. - return 0, nil +func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + if err := h.Validate(); err != nil { + return 0, err + } + } + if fh != nil { + if err := fh.Validate(); err != nil { + return 0, err + } + } + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + // Ensure no empty labels have gotten through. + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + var created bool + series, created = a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: series.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + } + + series.Lock() + defer series.Unlock() + + if ct <= a.minValidTime(series.lastTs) { + return 0, storage.ErrOutOfOrderCT + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + a.pendingHistograms = append(a.pendingHistograms, record.RefHistogramSample{ + Ref: series.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, series) + case fh != nil: + a.pendingFloatHistograms = append(a.pendingFloatHistograms, record.RefFloatHistogramSample{ + Ref: series.ref, + T: ct, + FH: &histogram.FloatHistogram{}, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, series) + } + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + return storage.SeriesRef(series.ref), nil +} + +func (a *appender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + newSeries, created := a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: newSeries.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + + series = newSeries + } + + series.Lock() + defer series.Unlock() + + if t <= a.minValidTime(series.lastTs) { + a.metrics.totalOutOfOrderSamples.Inc() + return 0, storage.ErrOutOfOrderSample + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + // NOTE: always modify pendingSamples and sampleSeries together. + a.pendingSamples = append(a.pendingSamples, record.RefSample{ + Ref: series.ref, + T: ct, + V: 0, + }) + a.sampleSeries = append(a.sampleSeries, series) + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + + return storage.SeriesRef(series.ref), nil } // Commit submits the collected samples and purges the batch. diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index b31041b1b9..b28c29095c 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -15,21 +15,23 @@ package agent import ( "context" + "errors" "fmt" + "io" "math" "path/filepath" "strconv" "testing" "time" - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" @@ -89,12 +91,12 @@ func createTestAgentDB(t testing.TB, reg prometheus.Registerer, opts *Options) * t.Helper() dbDir := t.TempDir() - rs := remote.NewStorage(log.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil, false) t.Cleanup(func() { require.NoError(t, rs.Close()) }) - db, err := Open(log.NewNopLogger(), reg, rs, dbDir, opts) + db, err := Open(promslog.NewNopLogger(), reg, rs, dbDir, opts) require.NoError(t, err) return db } @@ -583,7 +585,7 @@ func TestWALReplay(t *testing.T) { func TestLockfile(t *testing.T) { tsdbutil.TestDirLockerUsage(t, func(t *testing.T, data string, createLock bool) (*tsdbutil.DirLocker, testutil.Closer) { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() reg := prometheus.NewRegistry() rs := remote.NewStorage(logger, reg, startTime, data, time.Second*30, nil, false) t.Cleanup(func() { @@ -605,12 +607,12 @@ func TestLockfile(t *testing.T) { func Test_ExistingWAL_NextRef(t *testing.T) { dbDir := t.TempDir() - rs := remote.NewStorage(log.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil, false) defer func() { require.NoError(t, rs.Close()) }() - db, err := Open(log.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) + db, err := Open(promslog.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) require.NoError(t, err) seriesCount := 10 @@ -638,9 +640,11 @@ func Test_ExistingWAL_NextRef(t *testing.T) { require.NoError(t, db.Close()) // Create a new storage and see what nextRef is initialized to. - db, err = Open(log.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) + db, err = Open(promslog.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) require.NoError(t, err) - defer require.NoError(t, db.Close()) + defer func() { + require.NoError(t, db.Close()) + }() require.Equal(t, uint64(seriesCount+histogramCount), db.nextRef.Load(), "nextRef should be equal to the number of series written across the entire WAL") } @@ -932,6 +936,249 @@ func TestDBOutOfOrderTimeWindow(t *testing.T) { } } +type walSample struct { + t int64 + f float64 + h *histogram.Histogram + lbls labels.Labels + ref storage.SeriesRef +} + +func TestDBCreatedTimestampSamplesIngestion(t *testing.T) { + t.Parallel() + + type appendableSample struct { + t int64 + ct int64 + v float64 + lbls labels.Labels + h *histogram.Histogram + expectsError bool + } + + testHistogram := tsdbutil.GenerateTestHistograms(1)[0] + zeroHistogram := &histogram.Histogram{} + + lbls := labelsForTest(t.Name(), 1) + defLbls := labels.New(lbls[0]...) + + testCases := []struct { + name string + inputSamples []appendableSample + expectedSamples []*walSample + expectedSeriesCount int + }{ + { + name: "in order ct+normal sample/floatSamples", + inputSamples: []appendableSample{ + {t: 100, ct: 1, v: 10, lbls: defLbls}, + {t: 101, ct: 1, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, f: 0, lbls: defLbls}, + {t: 100, f: 10, lbls: defLbls}, + {t: 101, f: 10, lbls: defLbls}, + }, + }, + { + name: "CT+float && CT+histogram samples", + inputSamples: []appendableSample{ + { + t: 100, + ct: 30, + v: 20, + lbls: defLbls, + }, + { + t: 300, + ct: 230, + h: testHistogram, + lbls: defLbls, + }, + }, + expectedSamples: []*walSample{ + {t: 30, f: 0, lbls: defLbls}, + {t: 100, f: 20, lbls: defLbls}, + {t: 230, h: zeroHistogram, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 1, + }, + { + name: "CT+float && CT+histogram samples with error", + inputSamples: []appendableSample{ + { + // invalid CT + t: 100, + ct: 100, + v: 10, + lbls: defLbls, + expectsError: true, + }, + { + // invalid CT histogram + t: 300, + ct: 300, + h: testHistogram, + lbls: defLbls, + expectsError: true, + }, + }, + expectedSamples: []*walSample{ + {t: 100, f: 10, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 0, + }, + { + name: "In order ct+normal sample/histogram", + inputSamples: []appendableSample{ + {t: 100, h: testHistogram, ct: 1, lbls: defLbls}, + {t: 101, h: testHistogram, ct: 1, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, h: &histogram.Histogram{}}, + {t: 100, h: testHistogram}, + {t: 101, h: &histogram.Histogram{CounterResetHint: histogram.NotCounterReset}}, + }, + }, + { + name: "ct+normal then OOO sample/float", + inputSamples: []appendableSample{ + {t: 60_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 120_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 180_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 50_000, ct: 40_000, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 40_000, f: 0, lbls: defLbls}, + {t: 50_000, f: 10, lbls: defLbls}, + {t: 60_000, f: 10, lbls: defLbls}, + {t: 120_000, f: 10, lbls: defLbls}, + {t: 180_000, f: 10, lbls: defLbls}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 360_000 + s := createTestAgentDB(t, reg, opts) + app := s.Appender(context.TODO()) + + for _, sample := range tc.inputSamples { + // We supposed to write a Histogram to the WAL + if sample.h != nil { + _, err := app.AppendHistogramCTZeroSample(0, sample.lbls, sample.t, sample.ct, zeroHistogram, nil) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.AppendHistogram(0, sample.lbls, sample.t, sample.h, nil) + require.NoError(t, err) + } else { + // We supposed to write a float sample to the WAL + _, err := app.AppendCTZeroSample(0, sample.lbls, sample.t, sample.ct) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.Append(0, sample.lbls, sample.t, sample.v) + require.NoError(t, err) + } + } + + require.NoError(t, app.Commit()) + // Close the DB to ensure all data is flushed to the WAL + require.NoError(t, s.Close()) + + // Check that we dont have any OOO samples in the WAL by checking metrics + families, err := reg.Gather() + require.NoError(t, err, "failed to gather metrics") + for _, f := range families { + if f.GetName() == "prometheus_agent_out_of_order_samples_total" { + t.Fatalf("unexpected metric %s", f.GetName()) + } + } + + outputSamples := readWALSamples(t, s.wal.Dir()) + + require.Equal(t, len(tc.expectedSamples), len(outputSamples), "Expected %d samples", len(tc.expectedSamples)) + + for i, expectedSample := range tc.expectedSamples { + for _, sample := range outputSamples { + if sample.t == expectedSample.t && sample.lbls.String() == expectedSample.lbls.String() { + if expectedSample.h != nil { + require.Equal(t, expectedSample.h, sample.h, "histogram value mismatch (sample index %d)", i) + } else { + require.Equal(t, expectedSample.f, sample.f, "value mismatch (sample index %d)", i) + } + } + } + } + }) + } +} + +func readWALSamples(t *testing.T, walDir string) []*walSample { + t.Helper() + sr, err := wlog.NewSegmentsReader(walDir) + require.NoError(t, err) + defer func(sr io.ReadCloser) { + err := sr.Close() + require.NoError(t, err) + }(sr) + + r := wlog.NewReader(sr) + dec := record.NewDecoder(labels.NewSymbolTable()) + + var ( + samples []record.RefSample + histograms []record.RefHistogramSample + + lastSeries record.RefSeries + outputSamples = make([]*walSample, 0) + ) + + for r.Next() { + rec := r.Record() + switch dec.Type(rec) { + case record.Series: + series, err := dec.Series(rec, nil) + require.NoError(t, err) + lastSeries = series[0] + case record.Samples: + samples, err = dec.Samples(rec, samples[:0]) + require.NoError(t, err) + for _, s := range samples { + outputSamples = append(outputSamples, &walSample{ + t: s.T, + f: s.V, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + case record.HistogramSamples: + histograms, err = dec.HistogramSamples(rec, histograms[:0]) + require.NoError(t, err) + for _, h := range histograms { + outputSamples = append(outputSamples, &walSample{ + t: h.T, + h: h.H, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + } + } + + return outputSamples +} + func BenchmarkCreateSeries(b *testing.B) { s := createTestAgentDB(b, nil, DefaultOptions()) defer s.Close() diff --git a/tsdb/block.go b/tsdb/block.go index 2f32733f8c..48ba4588aa 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -20,15 +20,16 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "slices" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -265,7 +266,7 @@ func readMetaFile(dir string) (*BlockMeta, int64, error) { return &m, int64(len(b)), nil } -func writeMetaFile(logger log.Logger, dir string, meta *BlockMeta) (int64, error) { +func writeMetaFile(logger *slog.Logger, dir string, meta *BlockMeta) (int64, error) { meta.Version = metaVersion1 // Make any changes to the file appear atomic. @@ -273,7 +274,7 @@ func writeMetaFile(logger log.Logger, dir string, meta *BlockMeta) (int64, error tmp := path + ".tmp" defer func() { if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } }() @@ -319,7 +320,7 @@ type Block struct { indexr IndexReader tombstones tombstones.Reader - logger log.Logger + logger *slog.Logger numBytesChunks int64 numBytesIndex int64 @@ -329,9 +330,9 @@ type Block struct { // OpenBlock opens the block in the directory. It can be passed a chunk pool, which is used // to instantiate chunk structs. -func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, err error) { +func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool) (pb *Block, err error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } var closers []io.Closer defer func() { diff --git a/tsdb/block_test.go b/tsdb/block_test.go index f2569e35be..3589b42c17 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -22,12 +22,13 @@ import ( "math/rand" "os" "path/filepath" + "slices" "sort" "strconv" "testing" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -46,7 +47,7 @@ import ( func TestBlockMetaMustNeverBeVersion2(t *testing.T) { dir := t.TempDir() - _, err := writeMetaFile(log.NewNopLogger(), dir, &BlockMeta{}) + _, err := writeMetaFile(promslog.NewNopLogger(), dir, &BlockMeta{}) require.NoError(t, err) meta, _, err := readMetaFile(dir) @@ -151,7 +152,7 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Truncate(fi.Size()-1)) }, - iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk - required:26, available:25"), + iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk - required:25, available:24"), }, { name: "checksum mismatch", @@ -169,7 +170,7 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, err) require.Equal(t, 1, n) }, - iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: checksum mismatch expected:cfc0526c, actual:34815eae"), + iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: checksum mismatch expected:231bddcf, actual:d85ad10d"), }, } { t.Run(tc.name, func(t *testing.T) { @@ -191,7 +192,7 @@ func TestCorruptedChunk(t *testing.T) { // Check open err. b, err := OpenBlock(nil, blockDir, nil) if tc.openErr != nil { - require.Equal(t, tc.openErr.Error(), err.Error()) + require.EqualError(t, err, tc.openErr.Error()) return } defer func() { require.NoError(t, b.Close()) }() @@ -205,7 +206,7 @@ func TestCorruptedChunk(t *testing.T) { require.True(t, set.Next()) it := set.At().Iterator(nil) require.Equal(t, chunkenc.ValNone, it.Next()) - require.Equal(t, tc.iterErr.Error(), it.Err().Error()) + require.EqualError(t, it.Err(), tc.iterErr.Error()) }) } } @@ -310,6 +311,33 @@ func TestLabelValuesWithMatchers(t *testing.T) { } } +func TestBlockQuerierReturnsSortedLabelValues(t *testing.T) { + tmpdir := t.TempDir() + ctx := context.Background() + + var seriesEntries []storage.Series + for i := 100; i > 0; i-- { + seriesEntries = append(seriesEntries, storage.NewListSeries(labels.FromStrings( + "__name__", fmt.Sprintf("value%d", i), + ), []chunks.Sample{sample{100, 0, nil, nil}})) + } + + blockDir := createBlock(t, tmpdir, seriesEntries) + + // Check open err. + block, err := OpenBlock(nil, blockDir, nil) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, block.Close()) }) + + q, err := newBlockBaseQuerier(block, 0, 100) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, q.Close()) }) + + res, _, err := q.LabelValues(ctx, "__name__", nil) + require.NoError(t, err) + require.True(t, slices.IsSorted(res)) +} + // TestBlockSize ensures that the block size is calculated correctly. func TestBlockSize(t *testing.T) { tmpdir := t.TempDir() @@ -344,7 +372,7 @@ func TestBlockSize(t *testing.T) { require.NoError(t, err) require.Equal(t, expAfterDelete, actAfterDelete, "after a delete reported block size doesn't match actual disk size") - c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(t, err) blockDirsAfterCompact, err := c.Compact(tmpdir, []string{blockInit.Dir()}, nil) require.NoError(t, err) @@ -593,13 +621,13 @@ func testPostingsForLabelMatching(t *testing.T, offset storage.SeriesRef, setUp // createBlock creates a block with given set of series and returns its dir. func createBlock(tb testing.TB, dir string, series []storage.Series) string { - blockDir, err := CreateBlock(series, dir, 0, log.NewNopLogger()) + blockDir, err := CreateBlock(series, dir, 0, promslog.NewNopLogger()) require.NoError(tb, err) return blockDir } func createBlockFromHead(tb testing.TB, dir string, head *Head) string { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{1000000}, nil, nil) + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{1000000}, nil, nil) require.NoError(tb, err) require.NoError(tb, os.MkdirAll(dir, 0o777)) @@ -613,7 +641,7 @@ func createBlockFromHead(tb testing.TB, dir string, head *Head) string { } func createBlockFromOOOHead(tb testing.TB, dir string, head *OOOCompactionHead) string { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{1000000}, nil, nil) + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{1000000}, nil, nil) require.NoError(tb, err) require.NoError(tb, os.MkdirAll(dir, 0o777)) diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go index 232ec2b914..63f82e28df 100644 --- a/tsdb/blockwriter.go +++ b/tsdb/blockwriter.go @@ -17,11 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "os" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/prometheus/model/timestamp" @@ -31,7 +30,7 @@ import ( // BlockWriter is a block writer that allows appending and flushing series to disk. type BlockWriter struct { - logger log.Logger + logger *slog.Logger destinationDir string head *Head @@ -50,7 +49,7 @@ var ErrNoSeriesAppended = errors.New("no series appended, aborting") // contains anything at all. It is the caller's responsibility to // ensure that the resulting blocks do not overlap etc. // Writer ensures the block flush is atomic (via rename). -func NewBlockWriter(logger log.Logger, dir string, blockSize int64) (*BlockWriter, error) { +func NewBlockWriter(logger *slog.Logger, dir string, blockSize int64) (*BlockWriter, error) { w := &BlockWriter{ logger: logger, destinationDir: dir, @@ -95,7 +94,7 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { // Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime). // Because of this block intervals are always +1 than the total samples it includes. maxt := w.head.MaxTime() + 1 - level.Info(w.logger).Log("msg", "flushing", "series_count", w.head.NumSeries(), "mint", timestamp.Time(mint), "maxt", timestamp.Time(maxt)) + w.logger.Info("flushing", "series_count", w.head.NumSeries(), "mint", timestamp.Time(mint), "maxt", timestamp.Time(maxt)) compactor, err := NewLeveledCompactor(ctx, nil, @@ -121,7 +120,7 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { func (w *BlockWriter) Close() error { defer func() { if err := os.RemoveAll(w.chunkDir); err != nil { - level.Error(w.logger).Log("msg", "error in deleting BlockWriter files", "err", err) + w.logger.Error("error in deleting BlockWriter files", "err", err) } }() return w.head.Close() diff --git a/tsdb/blockwriter_test.go b/tsdb/blockwriter_test.go index d8240b53c6..4ec25df70a 100644 --- a/tsdb/blockwriter_test.go +++ b/tsdb/blockwriter_test.go @@ -19,9 +19,10 @@ import ( "path/filepath" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" ) @@ -29,7 +30,7 @@ import ( func TestBlockWriter(t *testing.T) { ctx := context.Background() outputDir := t.TempDir() - w, err := NewBlockWriter(log.NewNopLogger(), outputDir, DefaultBlockDuration) + w, err := NewBlockWriter(promslog.NewNopLogger(), outputDir, DefaultBlockDuration) require.NoError(t, err) // Add some series. diff --git a/tsdb/chunkenc/bstream.go b/tsdb/chunkenc/bstream.go index 8cc59f3ea7..6e01798f72 100644 --- a/tsdb/chunkenc/bstream.go +++ b/tsdb/chunkenc/bstream.go @@ -86,8 +86,8 @@ func (b *bstream) writeBit(bit bit) { func (b *bstream) writeByte(byt byte) { if b.count == 0 { - b.stream = append(b.stream, 0) - b.count = 8 + b.stream = append(b.stream, byt) + return } i := len(b.stream) - 1 @@ -95,10 +95,8 @@ func (b *bstream) writeByte(byt byte) { // Complete the last byte with the leftmost b.count bits from byt. b.stream[i] |= byt >> (8 - b.count) - b.stream = append(b.stream, 0) - i++ // Write the remainder, if any. - b.stream[i] = byt << b.count + b.stream = append(b.stream, byt< 250 { - break - } a.Append(p.t, p.v) - i++ - j++ } - chunks = append(chunks, c) } - - fmt.Println("num", b.N, "created chunks", len(chunks)) } diff --git a/tsdb/chunkenc/histogram_meta_test.go b/tsdb/chunkenc/histogram_meta_test.go index fdbd1825aa..1774dee867 100644 --- a/tsdb/chunkenc/histogram_meta_test.go +++ b/tsdb/chunkenc/histogram_meta_test.go @@ -14,7 +14,7 @@ // The code in this file was largely written by Damian Gryski as part of // https://github.com/dgryski/go-tsz and published under the license below. // It was modified to accommodate reading from byte slices without modifying -// the underlying bytes, which would panic when reading from mmap'd +// the underlying bytes, which would panic when reading from mmapped // read-only byte slices. package chunkenc diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go index 3177762f81..ac75a5994b 100644 --- a/tsdb/chunkenc/xor.go +++ b/tsdb/chunkenc/xor.go @@ -14,7 +14,7 @@ // The code in this file was largely written by Damian Gryski as part of // https://github.com/dgryski/go-tsz and published under the license below. // It was modified to accommodate reading from byte slices without modifying -// the underlying bytes, which would panic when reading from mmap'd +// the underlying bytes, which would panic when reading from mmapped // read-only byte slices. // Copyright (c) 2015,2016 Damian Gryski @@ -191,8 +191,8 @@ func (a *xorAppender) Append(t int64, v float64) { case dod == 0: a.b.writeBit(zero) case bitRange(dod, 14): - a.b.writeBits(0b10, 2) - a.b.writeBits(uint64(dod), 14) + a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. case bitRange(dod, 17): a.b.writeBits(0b110, 3) a.b.writeBits(uint64(dod), 17) diff --git a/tsdb/chunks/chunk_write_queue.go b/tsdb/chunks/chunk_write_queue.go index 6d2dc743b0..ba9730d936 100644 --- a/tsdb/chunks/chunk_write_queue.go +++ b/tsdb/chunks/chunk_write_queue.go @@ -24,7 +24,7 @@ import ( ) const ( - // Minimum recorded peak since the last shrinking of chunkWriteQueue.chunkrefMap to shrink it again. + // Minimum recorded peak since the last shrinking of chunkWriteQueue.chunkRefMap to shrink it again. chunkRefMapShrinkThreshold = 1000 // Minimum interval between shrinking of chunkWriteQueue.chunkRefMap. diff --git a/tsdb/chunks/samples.go b/tsdb/chunks/samples.go index 638660c70c..a5b16094df 100644 --- a/tsdb/chunks/samples.go +++ b/tsdb/chunks/samples.go @@ -29,6 +29,7 @@ type Sample interface { H() *histogram.Histogram FH() *histogram.FloatHistogram Type() chunkenc.ValueType + Copy() Sample // Returns a deep copy. } type SampleSlice []Sample @@ -70,6 +71,17 @@ func (s sample) Type() chunkenc.ValueType { } } +func (s sample) Copy() Sample { + c := sample{t: s.t, f: s.f} + if s.h != nil { + c.h = s.h.Copy() + } + if s.fh != nil { + c.fh = s.fh.Copy() + } + return c +} + // GenerateSamples starting at start and counting up numSamples. func GenerateSamples(start, numSamples int) []Sample { return generateSamples(start, numSamples, func(i int) Sample { diff --git a/tsdb/compact.go b/tsdb/compact.go index 9ef42b339b..ff35679e3f 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -19,15 +19,15 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "slices" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -80,7 +80,7 @@ type Compactor interface { // LeveledCompactor implements the Compactor interface. type LeveledCompactor struct { metrics *CompactorMetrics - logger log.Logger + logger *slog.Logger ranges []int64 chunkPool chunkenc.Pool ctx context.Context @@ -167,7 +167,7 @@ type LeveledCompactorOptions struct { EnableOverlappingCompaction bool } -func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { +func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{ MaxBlockChunkSegmentSize: maxBlockChunkSegmentSize, MergeFunc: mergeFunc, @@ -175,14 +175,14 @@ func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Register }) } -func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { +func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{ MergeFunc: mergeFunc, EnableOverlappingCompaction: true, }) } -func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) { +func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) { if len(ranges) == 0 { return nil, fmt.Errorf("at least one range must be provided") } @@ -190,7 +190,7 @@ func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer pool = chunkenc.NewPool() } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } mergeFunc := opts.MergeFunc if mergeFunc == nil { @@ -500,15 +500,15 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, b.meta.Compaction.Deletable = true n, err := writeMetaFile(c.logger, b.dir, &b.meta) if err != nil { - level.Error(c.logger).Log( - "msg", "Failed to write 'Deletable' to meta file after compaction", + c.logger.Error( + "Failed to write 'Deletable' to meta file after compaction", "ulid", b.meta.ULID, ) } b.numBytesMeta = n } - level.Info(c.logger).Log( - "msg", "compact blocks resulted in empty block", + c.logger.Info( + "compact blocks resulted in empty block", "count", len(blocks), "sources", fmt.Sprintf("%v", uids), "duration", time.Since(start), @@ -516,8 +516,8 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, return nil, nil } - level.Info(c.logger).Log( - "msg", "compact blocks", + c.logger.Info( + "compact blocks", "count", len(blocks), "mint", meta.MinTime, "maxt", meta.MaxTime, @@ -568,8 +568,8 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b } if meta.Stats.NumSamples == 0 { - level.Info(c.logger).Log( - "msg", "write block resulted in empty block", + c.logger.Info( + "write block resulted in empty block", "mint", meta.MinTime, "maxt", meta.MaxTime, "duration", time.Since(start), @@ -577,8 +577,8 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b return nil, nil } - level.Info(c.logger).Log( - "msg", "write block", + c.logger.Info( + "write block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, @@ -617,7 +617,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl // RemoveAll returns no error when tmp doesn't exist so it is safe to always run it. if err := os.RemoveAll(tmp); err != nil { - level.Error(c.logger).Log("msg", "removed tmp folder after failed compaction", "err", err.Error()) + c.logger.Error("removed tmp folder after failed compaction", "err", err.Error()) } c.metrics.Ran.Inc() c.metrics.Duration.Observe(time.Since(t).Seconds()) @@ -722,7 +722,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl } type BlockPopulator interface { - PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger log.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) error + PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger *slog.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) error } // IndexReaderPostingsFunc is a function to get a sorted posting iterator from a given index reader. @@ -743,7 +743,7 @@ type DefaultBlockPopulator struct{} // PopulateBlock fills the index and chunk writers with new data gathered as the union // of the provided blocks. It returns meta information for the new block. // It expects sorted blocks input by mint. -func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger log.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) (err error) { +func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger *slog.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) (err error) { if len(blocks) == 0 { return errors.New("cannot populate block from no readers") } @@ -776,7 +776,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa if i > 0 && b.Meta().MinTime < globalMaxt { metrics.OverlappingBlocks.Inc() overlapping = true - level.Info(logger).Log("msg", "Found overlapping blocks during compaction", "ulid", meta.ULID) + logger.Info("Found overlapping blocks during compaction", "ulid", meta.ULID) } if b.Meta().MaxTime > globalMaxt { globalMaxt = b.Meta().MaxTime diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index e7998abf7d..5123d6e624 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -28,9 +28,9 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -434,7 +434,7 @@ func TestRangeWithFailedCompactionWontGetSelected(t *testing.T) { } func TestCompactionFailWillCleanUpTempDir(t *testing.T) { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{ + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{ 20, 60, 240, @@ -1045,8 +1045,7 @@ func TestCompaction_populateBlock(t *testing.T) { } err = blockPopulator.PopulateBlock(c.ctx, c.metrics, c.logger, c.chunkPool, c.mergeFunc, blocks, meta, iw, nopChunkWriter{}, irPostingsFunc) if tc.expErr != nil { - require.Error(t, err) - require.Equal(t, tc.expErr.Error(), err.Error()) + require.EqualError(t, err, tc.expErr.Error()) return } require.NoError(t, err) @@ -1163,7 +1162,7 @@ func BenchmarkCompaction(b *testing.B) { blockDirs = append(blockDirs, block.Dir()) } - c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(b, err) b.ResetTimer() @@ -1319,7 +1318,7 @@ func TestCancelCompactions(t *testing.T) { // Measure the compaction time without interrupting it. var timeCompactionUninterrupted time.Duration { - db, err := open(tmpdir, log.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) + db, err := open(tmpdir, promslog.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 3, "initial block count mismatch") require.Equal(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.Ran), "initial compaction counter mismatch") @@ -1338,7 +1337,7 @@ func TestCancelCompactions(t *testing.T) { } // Measure the compaction time when closing the db in the middle of compaction. { - db, err := open(tmpdirCopy, log.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) + db, err := open(tmpdirCopy, promslog.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 3, "initial block count mismatch") require.Equal(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.Ran), "initial compaction counter mismatch") @@ -1359,7 +1358,7 @@ func TestCancelCompactions(t *testing.T) { // This checks that the `context.Canceled` error is properly checked at all levels: // - tsdb_errors.NewMulti() should have the Is() method implemented for correct checks. // - callers should check with errors.Is() instead of ==. - readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", log.NewNopLogger()) + readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", promslog.NewNopLogger()) require.NoError(t, err) blocks, err := readOnlyDB.Blocks() require.NoError(t, err) @@ -1371,7 +1370,7 @@ func TestCancelCompactions(t *testing.T) { } // TestDeleteCompactionBlockAfterFailedReload ensures that a failed reloadBlocks immediately after a compaction -// deletes the resulting block to avoid creatings blocks with the same time range. +// deletes the resulting block to avoid creating blocks with the same time range. func TestDeleteCompactionBlockAfterFailedReload(t *testing.T) { tests := map[string]func(*DB) int{ "Test Head Compaction": func(db *DB) int { @@ -1918,7 +1917,7 @@ func TestCompactEmptyResultBlockWithTombstone(t *testing.T) { err = block.Delete(ctx, 0, 10, labels.MustNewMatcher(labels.MatchEqual, defaultLabelName, "0")) require.NoError(t, err) - c, err := NewLeveledCompactor(ctx, nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(ctx, nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(t, err) ulids, err := c.Compact(tmpdir, []string{blockDir}, []*Block{block}) @@ -2114,7 +2113,7 @@ func TestDelayedCompactionDoesNotBlockUnrelatedOps(t *testing.T) { t.Parallel() tmpdir := t.TempDir() - // Some blocks that need compation are present. + // Some blocks that need compaction are present. createBlock(t, tmpdir, genSeries(1, 1, 0, 100)) createBlock(t, tmpdir, genSeries(1, 1, 100, 200)) createBlock(t, tmpdir, genSeries(1, 1, 200, 300)) @@ -2122,7 +2121,7 @@ func TestDelayedCompactionDoesNotBlockUnrelatedOps(t *testing.T) { options := DefaultOptions() // This will make the test timeout if compaction really waits for it. options.CompactionDelay = time.Hour - db, err := open(tmpdir, log.NewNopLogger(), nil, options, []int64{10, 200}, nil) + db, err := open(tmpdir, promslog.NewNopLogger(), nil, options, []int64{10, 200}, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) diff --git a/tsdb/db.go b/tsdb/db.go index a5b3a5e602..bb9fe6ad7e 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "io/fs" + "log/slog" "math" "math/rand" "os" @@ -29,10 +30,9 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "go.uber.org/atomic" "golang.org/x/sync/errgroup" @@ -52,6 +52,9 @@ const ( // DefaultBlockDuration in milliseconds. DefaultBlockDuration = int64(2 * time.Hour / time.Millisecond) + // DefaultCompactionDelayMaxPercent in percentage. + DefaultCompactionDelayMaxPercent = 10 + // Block dir suffixes to make deletion and creation operations atomic. // We decided to do suffixes instead of creating meta.json as last (or delete as first) one, // because in error case you still can recover meta.json from the block content within local TSDB dir. @@ -86,6 +89,7 @@ func DefaultOptions() *Options { EnableOverlappingCompaction: true, EnableSharding: false, EnableDelayedCompaction: false, + CompactionDelayMaxPercent: DefaultCompactionDelayMaxPercent, CompactionDelay: time.Duration(0), } } @@ -173,6 +177,12 @@ type Options struct { // EnableNativeHistograms enables the ingestion of native histograms. EnableNativeHistograms bool + // EnableOOONativeHistograms enables the ingestion of OOO native histograms. + // It will only take effect if EnableNativeHistograms is set to true and the + // OutOfOrderTimeWindow is > 0. This flag will be removed after testing of + // OOO Native Histogram ingestion is complete. + EnableOOONativeHistograms bool + // OutOfOrderTimeWindow specifies how much out of order is allowed, if any. // This can change during run-time, so this value from here should only be used // while initialising. @@ -198,6 +208,8 @@ type Options struct { // CompactionDelay delays the start time of auto compactions. // It can be increased by up to one minute if the DB does not commit too often. CompactionDelay time.Duration + // CompactionDelayMaxPercent is the upper limit for CompactionDelay, specified as a percentage of the head chunk range. + CompactionDelayMaxPercent int // NewCompactorFunc is a function that returns a TSDB compactor. NewCompactorFunc NewCompactorFunc @@ -209,7 +221,7 @@ type Options struct { BlockChunkQuerierFunc BlockChunkQuerierFunc } -type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) +type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) type BlocksToDeleteFunc func(blocks []*Block) map[ulid.ULID]struct{} @@ -223,7 +235,7 @@ type DB struct { dir string locker *tsdbutil.DirLocker - logger log.Logger + logger *slog.Logger metrics *dbMetrics opts *Options chunkPool chunkenc.Pool @@ -414,7 +426,7 @@ var ErrClosed = errors.New("db already closed") // Current implementation doesn't support concurrency so // all API calls should happen in the same go routine. type DBReadOnly struct { - logger log.Logger + logger *slog.Logger dir string sandboxDir string closers []io.Closer @@ -422,7 +434,7 @@ type DBReadOnly struct { } // OpenDBReadOnly opens DB in the given directory for read only operations. -func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, error) { +func OpenDBReadOnly(dir, sandboxDirRoot string, l *slog.Logger) (*DBReadOnly, error) { if _, err := os.Stat(dir); err != nil { return nil, fmt.Errorf("opening the db dir: %w", err) } @@ -436,7 +448,7 @@ func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, erro } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } return &DBReadOnly{ @@ -635,7 +647,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { if len(corrupted) > 0 { for _, b := range loadable { if err := b.Close(); err != nil { - level.Warn(db.logger).Log("msg", "Closing block failed", "err", err, "block", b) + db.logger.Warn("Closing block failed", "err", err, "block", b) } } errs := tsdb_errors.NewMulti() @@ -667,7 +679,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { blockMetas = append(blockMetas, b.Meta()) } if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 { - level.Warn(db.logger).Log("msg", "Overlapping blocks found during opening", "detail", overlaps.String()) + db.logger.Warn("Overlapping blocks found during opening", "detail", overlaps.String()) } // Close all previously open readers and add the new ones to the cache. @@ -745,7 +757,7 @@ func (db *DBReadOnly) Close() error { defer func() { // Delete the temporary sandbox directory that was created when opening the DB. if err := os.RemoveAll(db.sandboxDir); err != nil { - level.Error(db.logger).Log("msg", "delete sandbox dir", "err", err) + db.logger.Error("delete sandbox dir", "err", err) } }() select { @@ -759,7 +771,7 @@ func (db *DBReadOnly) Close() error { } // Open returns a new DB in the given directory. If options are empty, DefaultOptions will be used. -func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, stats *DBStats) (db *DB, err error) { +func Open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, stats *DBStats) (db *DB, err error) { var rngs []int64 opts, rngs = validateOpts(opts, nil) @@ -809,12 +821,12 @@ func validateOpts(opts *Options, rngs []int64) (*Options, []int64) { // open returns a new DB in the given directory. // It initializes the lockfile, WAL, compactor, and Head (by replaying the WAL), and runs the database. // It is not safe to open more than one DB in the same directory. -func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs []int64, stats *DBStats) (_ *DB, returnedErr error) { +func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rngs []int64, stats *DBStats) (_ *DB, returnedErr error) { if err := os.MkdirAll(dir, 0o777); err != nil { return nil, err } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if stats == nil { stats = NewDBStats() @@ -948,6 +960,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs headOpts.MaxExemplars.Store(opts.MaxExemplars) headOpts.EnableMemorySnapshotOnShutdown = opts.EnableMemorySnapshotOnShutdown headOpts.EnableNativeHistograms.Store(opts.EnableNativeHistograms) + headOpts.EnableOOONativeHistograms.Store(opts.EnableOOONativeHistograms) headOpts.OutOfOrderTimeWindow.Store(opts.OutOfOrderTimeWindow) headOpts.OutOfOrderCapMax.Store(opts.OutOfOrderCapMax) headOpts.EnableSharding = opts.EnableSharding @@ -991,17 +1004,17 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs db.head.metrics.walCorruptionsTotal.Inc() var e *errLoadWbl if errors.As(initErr, &e) { - level.Warn(db.logger).Log("msg", "Encountered WBL read error, attempting repair", "err", initErr) + db.logger.Warn("Encountered WBL read error, attempting repair", "err", initErr) if err := wbl.Repair(e.err); err != nil { return nil, fmt.Errorf("repair corrupted WBL: %w", err) } - level.Info(db.logger).Log("msg", "Successfully repaired WBL") + db.logger.Info("Successfully repaired WBL") } else { - level.Warn(db.logger).Log("msg", "Encountered WAL read error, attempting repair", "err", initErr) + db.logger.Warn("Encountered WAL read error, attempting repair", "err", initErr) if err := wal.Repair(initErr); err != nil { return nil, fmt.Errorf("repair corrupted WAL: %w", err) } - level.Info(db.logger).Log("msg", "Successfully repaired WAL") + db.logger.Info("Successfully repaired WAL") } } @@ -1019,7 +1032,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs return db, nil } -func removeBestEffortTmpDirs(l log.Logger, dir string) error { +func removeBestEffortTmpDirs(l *slog.Logger, dir string) error { files, err := os.ReadDir(dir) if os.IsNotExist(err) { return nil @@ -1030,10 +1043,10 @@ func removeBestEffortTmpDirs(l log.Logger, dir string) error { for _, f := range files { if isTmpDir(f) { if err := os.RemoveAll(filepath.Join(dir, f.Name())); err != nil { - level.Error(l).Log("msg", "failed to delete tmp block dir", "dir", filepath.Join(dir, f.Name()), "err", err) + l.Error("failed to delete tmp block dir", "dir", filepath.Join(dir, f.Name()), "err", err) continue } - level.Info(l).Log("msg", "Found and deleted tmp block dir", "dir", filepath.Join(dir, f.Name())) + l.Info("Found and deleted tmp block dir", "dir", filepath.Join(dir, f.Name())) } } return nil @@ -1071,7 +1084,7 @@ func (db *DB) run(ctx context.Context) { case <-time.After(1 * time.Minute): db.cmtx.Lock() if err := db.reloadBlocks(); err != nil { - level.Error(db.logger).Log("msg", "reloadBlocks", "err", err) + db.logger.Error("reloadBlocks", "err", err) } db.cmtx.Unlock() @@ -1087,7 +1100,7 @@ func (db *DB) run(ctx context.Context) { db.autoCompactMtx.Lock() if db.autoCompact { if err := db.Compact(ctx); err != nil { - level.Error(db.logger).Log("msg", "compaction failed", "err", err) + db.logger.Error("compaction failed", "err", err) backoff = exponential(backoff, 1*time.Second, 1*time.Minute) } else { backoff = 0 @@ -1172,6 +1185,16 @@ func (db *DB) DisableNativeHistograms() { db.head.DisableNativeHistograms() } +// EnableOOONativeHistograms enables the ingestion of out-of-order native histograms. +func (db *DB) EnableOOONativeHistograms() { + db.head.EnableOOONativeHistograms() +} + +// DisableOOONativeHistograms disables the ingestion of out-of-order native histograms. +func (db *DB) DisableOOONativeHistograms() { + db.head.DisableOOONativeHistograms() +} + // dbAppender wraps the DB's head appender and triggers compactions on commit // if necessary. type dbAppender struct { @@ -1291,8 +1314,8 @@ func (db *DB) Compact(ctx context.Context) (returnErr error) { compactionDuration := time.Since(start) if compactionDuration.Milliseconds() > db.head.chunkRange.Load() { - level.Warn(db.logger).Log( - "msg", "Head compaction took longer than the block time range, compactions are falling behind and won't be able to catch up", + db.logger.Warn( + "Head compaction took longer than the block time range, compactions are falling behind and won't be able to catch up", "duration", compactionDuration.String(), "block_range", db.head.chunkRange.Load(), ) @@ -1416,15 +1439,15 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID } if len(ulids) == 0 { - level.Info(db.logger).Log( - "msg", "compact ooo head resulted in no blocks", + db.logger.Info( + "compact ooo head resulted in no blocks", "duration", time.Since(start), ) return nil, nil } - level.Info(db.logger).Log( - "msg", "out-of-order compaction completed", + db.logger.Info( + "out-of-order compaction completed", "duration", time.Since(start), "ulids", fmt.Sprintf("%v", ulids), ) @@ -1466,7 +1489,7 @@ func (db *DB) compactBlocks() (err error) { // long enough that we end up with a HEAD block that needs to be written. // Check if that's the case and stop compactions early. if db.head.compactable() && !db.waitingForCompactionDelay() { - level.Warn(db.logger).Log("msg", "aborting block compactions to persit the head block") + db.logger.Warn("aborting block compactions to persit the head block") return nil } @@ -1562,7 +1585,7 @@ func (db *DB) reloadBlocks() (err error) { for _, b := range block.Meta().Compaction.Parents { if _, ok := corrupted[b.ULID]; ok { delete(corrupted, b.ULID) - level.Warn(db.logger).Log("msg", "Found corrupted block, but replaced by compacted one so it's safe to delete. This should not happen with atomic deletes.", "block", b.ULID) + db.logger.Warn("Found corrupted block, but replaced by compacted one so it's safe to delete. This should not happen with atomic deletes.", "block", b.ULID) } deletable[b.ULID] = nil } @@ -1624,7 +1647,7 @@ func (db *DB) reloadBlocks() (err error) { blockMetas = append(blockMetas, b.Meta()) } if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 { - level.Warn(db.logger).Log("msg", "Overlapping blocks found during reloadBlocks", "detail", overlaps.String()) + db.logger.Warn("Overlapping blocks found during reloadBlocks", "detail", overlaps.String()) } } @@ -1640,7 +1663,7 @@ func (db *DB) reloadBlocks() (err error) { return nil } -func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { +func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { bDirs, err := blockDirs(dir) if err != nil { return nil, nil, fmt.Errorf("find blocks: %w", err) @@ -1650,7 +1673,7 @@ func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Po for _, bDir := range bDirs { meta, _, err := readMetaFile(bDir) if err != nil { - level.Error(l).Log("msg", "Failed to read meta.json for a block during reloadBlocks. Skipping", "dir", bDir, "err", err) + l.Error("Failed to read meta.json for a block during reloadBlocks. Skipping", "dir", bDir, "err", err) continue } @@ -1767,7 +1790,7 @@ func (db *DB) deleteBlocks(blocks map[ulid.ULID]*Block) error { for ulid, block := range blocks { if block != nil { if err := block.Close(); err != nil { - level.Warn(db.logger).Log("msg", "Closing block failed", "err", err, "block", ulid) + db.logger.Warn("Closing block failed", "err", err, "block", ulid) } } @@ -1788,7 +1811,7 @@ func (db *DB) deleteBlocks(blocks map[ulid.ULID]*Block) error { if err := os.RemoveAll(tmpToDelete); err != nil { return fmt.Errorf("delete obsolete block %s: %w", ulid, err) } - level.Info(db.logger).Log("msg", "Deleting obsolete block", "block", ulid) + db.logger.Info("Deleting obsolete block", "block", ulid) } return nil @@ -1956,7 +1979,7 @@ func (db *DB) DisableCompactions() { defer db.autoCompactMtx.Unlock() db.autoCompact = false - level.Info(db.logger).Log("msg", "Compactions disabled") + db.logger.Info("Compactions disabled") } // EnableCompactions enables auto compactions. @@ -1965,12 +1988,11 @@ func (db *DB) EnableCompactions() { defer db.autoCompactMtx.Unlock() db.autoCompact = true - level.Info(db.logger).Log("msg", "Compactions enabled") + db.logger.Info("Compactions enabled") } func (db *DB) generateCompactionDelay() time.Duration { - // Up to 10% of the head's chunkRange. - return time.Duration(rand.Int63n(db.head.chunkRange.Load()/10)) * time.Millisecond + return time.Duration(rand.Int63n(db.head.chunkRange.Load()*int64(db.opts.CompactionDelayMaxPercent)/100)) * time.Millisecond } // ForceHeadMMap is intended for use only in tests and benchmarks. @@ -1995,7 +2017,7 @@ func (db *DB) Snapshot(dir string, withHead bool) error { defer db.mtx.RUnlock() for _, b := range db.blocks { - level.Info(db.logger).Log("msg", "Snapshotting block", "block", b) + db.logger.Info("Snapshotting block", "block", b) if err := b.Snapshot(dir); err != nil { return fmt.Errorf("error snapshotting block: %s: %w", b.Dir(), err) @@ -2043,6 +2065,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.Querier + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) var err error @@ -2067,13 +2090,14 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { if err != nil { return nil, fmt.Errorf("open block querier for head while getting new querier %s: %w", rh, err) } + inoMint = newMint } } if overlapsOOO { // We need to fetch from in-order and out-of-order chunks: wrap the headQuerier. isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef) - headQuerier = NewHeadAndOOOQuerier(mint, maxt, db.head, isoState, headQuerier) + headQuerier = NewHeadAndOOOQuerier(inoMint, mint, maxt, db.head, isoState, headQuerier) } if headQuerier != nil { @@ -2119,6 +2143,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.ChunkQuerier + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt) @@ -2142,13 +2167,14 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer if err != nil { return nil, fmt.Errorf("open querier for head while getting new querier %s: %w", rh, err) } + inoMint = newMint } } if overlapsOOO { // We need to fetch from in-order and out-of-order chunks: wrap the headQuerier. isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef) - headQuerier = NewHeadAndOOOChunkQuerier(mint, maxt, db.head, isoState, headQuerier) + headQuerier = NewHeadAndOOOChunkQuerier(inoMint, mint, maxt, db.head, isoState, headQuerier) } if headQuerier != nil { @@ -2252,7 +2278,7 @@ func (db *DB) CleanTombstones() (err error) { for _, uid := range uids { dir := filepath.Join(db.Dir(), uid.String()) if err := os.RemoveAll(dir); err != nil { - level.Error(db.logger).Log("msg", "failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err) + db.logger.Error("failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err) } } if err != nil { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 4e3a077f6a..50f50a3a25 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -15,13 +15,17 @@ package tsdb import ( "bufio" + "bytes" "context" "encoding/binary" "flag" "fmt" "hash/crc32" + "log/slog" "math" "math/rand" + "net/http" + "net/http/httptest" "os" "path" "path/filepath" @@ -32,14 +36,20 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "go.uber.org/goleak" + "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage/remote" + + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -102,23 +112,9 @@ func query(t testing.TB, q storage.Querier, matchers ...*labels.Matcher) map[str for ss.Next() { series := ss.At() - samples := []chunks.Sample{} it = series.Iterator(it) - for typ := it.Next(); typ != chunkenc.ValNone; typ = it.Next() { - switch typ { - case chunkenc.ValFloat: - ts, v := it.At() - samples = append(samples, sample{t: ts, f: v}) - case chunkenc.ValHistogram: - ts, h := it.AtHistogram(nil) - samples = append(samples, sample{t: ts, h: h}) - case chunkenc.ValFloatHistogram: - ts, fh := it.AtFloatHistogram(nil) - samples = append(samples, sample{t: ts, fh: fh}) - default: - t.Fatalf("unknown sample type in query %s", typ.String()) - } - } + samples, err := storage.ExpandSamples(it, newSample) + require.NoError(t, err) require.NoError(t, it.Err()) if len(samples) == 0 { @@ -245,8 +241,8 @@ func TestDataAvailableOnlyAfterCommit(t *testing.T) { func TestNoPanicAfterWALCorruption(t *testing.T) { db := openTestDB(t, &Options{WALSegmentSize: 32 * 1024}, nil) - // Append until the first mmaped head chunk. - // This is to ensure that all samples can be read from the mmaped chunks when the WAL is corrupted. + // Append until the first mmapped head chunk. + // This is to ensure that all samples can be read from the mmapped chunks when the WAL is corrupted. var expSamples []chunks.Sample var maxt int64 ctx := context.Background() @@ -265,7 +261,7 @@ func TestNoPanicAfterWALCorruption(t *testing.T) { // Corrupt the WAL after the first sample of the series so that it has at least one sample and // it is not garbage collected. - // The repair deletes all WAL records after the corrupted record and these are read from the mmaped chunk. + // The repair deletes all WAL records after the corrupted record and these are read from the mmapped chunk. { walFiles, err := os.ReadDir(path.Join(db.Dir(), "wal")) require.NoError(t, err) @@ -1140,7 +1136,7 @@ func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBefore require.NoError(t, db.Close()) // Reopen the DB, replaying the WAL. - reopenDB, err := Open(db.Dir(), log.NewLogfmtLogger(os.Stderr), nil, nil, nil) + reopenDB, err := Open(db.Dir(), promslog.New(&promslog.Config{}), nil, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reopenDB.Close()) @@ -1609,7 +1605,7 @@ func TestSizeRetention(t *testing.T) { // Create a WAL checkpoint, and compare sizes. first, last, err := wlog.Segments(db.Head().wal.Dir()) require.NoError(t, err) - _, err = wlog.Checkpoint(log.NewNopLogger(), db.Head().wal, first, last-1, func(x chunks.HeadSeriesRef) bool { return false }, 0) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(x chunks.HeadSeriesRef) bool { return false }, 0) require.NoError(t, err) blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. walSize, err = db.Head().wal.Size() @@ -2350,7 +2346,7 @@ func TestCorrectNumTombstones(t *testing.T) { // This ensures that a snapshot that includes the head and creates a block with a custom time range // will not overlap with the first block created by the next compaction. func TestBlockRanges(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger := promslog.New(&promslog.Config{}) ctx := context.Background() dir := t.TempDir() @@ -2435,7 +2431,7 @@ func TestBlockRanges(t *testing.T) { func TestDBReadOnly(t *testing.T) { var ( dbDir string - logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger = promslog.New(&promslog.Config{}) expBlocks []*Block expBlock *Block expSeries map[string][]chunks.Sample @@ -2553,7 +2549,7 @@ func TestDBReadOnly(t *testing.T) { // all api methods return an ErrClosed. func TestDBReadOnlyClosing(t *testing.T) { sandboxDir := t.TempDir() - db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) + db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, promslog.New(&promslog.Config{})) require.NoError(t, err) // The sandboxDir was there. require.DirExists(t, db.sandboxDir) @@ -2570,7 +2566,7 @@ func TestDBReadOnlyClosing(t *testing.T) { func TestDBReadOnly_FlushWAL(t *testing.T) { var ( dbDir string - logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger = promslog.New(&promslog.Config{}) err error maxt int ctx = context.Background() @@ -2650,7 +2646,7 @@ func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { spinUpQuerierAndCheck := func(dir, sandboxDir string, chunksCount int) { dBDirHash := dirHash(dir) - // Bootsrap a RO db from the same dir and set up a querier. + // Bootstrap a RO db from the same dir and set up a querier. dbReadOnly, err := OpenDBReadOnly(dir, sandboxDir, nil) require.NoError(t, err) require.Equal(t, chunksCount, countChunks(dir)) @@ -2669,7 +2665,7 @@ func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { require.NoError(t, db.Close()) }() - // Append until the first mmaped head chunk. + // Append until the first mmapped head chunk. for i := 0; i < 121; i++ { app := db.Appender(context.Background()) _, err := app.Append(0, labels.FromStrings("foo", "bar"), int64(i), 0) @@ -3115,7 +3111,7 @@ func TestCompactHead(t *testing.T) { WALCompression: wlog.CompressionSnappy, } - db, err := Open(dbDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err := Open(dbDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) ctx := context.Background() app := db.Appender(ctx) @@ -3136,7 +3132,7 @@ func TestCompactHead(t *testing.T) { // Delete everything but the new block and // reopen the db to query it to ensure it includes the head data. require.NoError(t, deleteNonBlocks(db.Dir())) - db, err = Open(dbDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err = Open(dbDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 1) require.Equal(t, int64(maxt), db.Head().MinTime()) @@ -3163,7 +3159,7 @@ func TestCompactHead(t *testing.T) { // TestCompactHeadWithDeletion tests https://github.com/prometheus/prometheus/issues/11585. func TestCompactHeadWithDeletion(t *testing.T) { - db, err := Open(t.TempDir(), log.NewNopLogger(), prometheus.NewRegistry(), nil, nil) + db, err := Open(t.TempDir(), promslog.NewNopLogger(), prometheus.NewRegistry(), nil, nil) require.NoError(t, err) ctx := context.Background() @@ -3276,7 +3272,7 @@ func TestOpen_VariousBlockStates(t *testing.T) { // Regression test: Already removed parent can be still in list, which was causing Open errors. m.Compaction.Parents = append(m.Compaction.Parents, BlockDesc{ULID: ulid.MustParse(filepath.Base(compacted))}) m.Compaction.Parents = append(m.Compaction.Parents, BlockDesc{ULID: ulid.MustParse(filepath.Base(compacted))}) - _, err = writeMetaFile(log.NewLogfmtLogger(os.Stderr), dir, m) + _, err = writeMetaFile(promslog.New(&promslog.Config{}), dir, m) require.NoError(t, err) } tmpCheckpointDir := path.Join(tmpDir, "wal/checkpoint.00000001.tmp") @@ -3288,7 +3284,7 @@ func TestOpen_VariousBlockStates(t *testing.T) { opts := DefaultOptions() opts.RetentionDuration = 0 - db, err := Open(tmpDir, log.NewLogfmtLogger(os.Stderr), nil, opts, nil) + db, err := Open(tmpDir, promslog.New(&promslog.Config{}), nil, opts, nil) require.NoError(t, err) loadedBlocks := db.Blocks() @@ -3332,7 +3328,7 @@ func TestOneCheckpointPerCompactCall(t *testing.T) { tmpDir := t.TempDir() ctx := context.Background() - db, err := Open(tmpDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err := Open(tmpDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, db.Close()) @@ -3394,7 +3390,7 @@ func TestOneCheckpointPerCompactCall(t *testing.T) { createBlock(t, db.dir, genSeries(1, 1, newBlockMint, newBlockMaxt)) - db, err = Open(db.dir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err = Open(db.dir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) db.DisableCompactions() @@ -3443,7 +3439,7 @@ func TestNoPanicOnTSDBOpenError(t *testing.T) { tmpdir := t.TempDir() // Taking the lock will cause a TSDB startup error. - l, err := tsdbutil.NewDirLocker(tmpdir, "tsdb", log.NewNopLogger(), nil) + l, err := tsdbutil.NewDirLocker(tmpdir, "tsdb", promslog.NewNopLogger(), nil) require.NoError(t, err) require.NoError(t, l.Lock()) @@ -3996,6 +3992,307 @@ func newTestDB(t *testing.T) *DB { } func TestOOOWALWrite(t *testing.T) { + minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } + + s := labels.NewSymbolTable() + scratchBuilder1 := labels.NewScratchBuilderWithSymbolTable(s, 1) + scratchBuilder1.Add("l", "v1") + s1 := scratchBuilder1.Labels() + scratchBuilder2 := labels.NewScratchBuilderWithSymbolTable(s, 1) + scratchBuilder2.Add("l", "v2") + s2 := scratchBuilder2.Labels() + + scenarios := map[string]struct { + appendSample func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) + expectedOOORecords []interface{} + expectedInORecords []interface{} + }{ + "float": { + appendSample: func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) { + seriesRef, err := app.Append(0, l, minutes(mins), float64(mins)) + require.NoError(t, err) + return seriesRef, nil + }, + expectedOOORecords: []interface{}{ + // The MmapRef in this are not hand calculated, and instead taken from the test run. + // What is important here is the order of records, and that MmapRef increases for each record. + []record.RefMmapMarker{ + {Ref: 1}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(40), V: 40}, + }, + + []record.RefMmapMarker{ + {Ref: 2}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(42), V: 42}, + }, + + []record.RefSample{ + {Ref: 2, T: minutes(45), V: 45}, + {Ref: 1, T: minutes(35), V: 35}, + }, + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 8}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(36), V: 36}, + {Ref: 1, T: minutes(37), V: 37}, + }, + + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 58}, + }, + []record.RefSample{ // Does not contain the in-order sample here. + {Ref: 1, T: minutes(50), V: 50}, + }, + + // Single commit but multiple OOO records. + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 107}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(50), V: 50}, + {Ref: 2, T: minutes(51), V: 51}, + }, + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 156}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(52), V: 52}, + {Ref: 2, T: minutes(53), V: 53}, + }, + }, + expectedInORecords: []interface{}{ + []record.RefSeries{ + {Ref: 1, Labels: s1}, + {Ref: 2, Labels: s2}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(60), V: 60}, + {Ref: 2, T: minutes(60), V: 60}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(40), V: 40}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(42), V: 42}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(45), V: 45}, + {Ref: 1, T: minutes(35), V: 35}, + {Ref: 1, T: minutes(36), V: 36}, + {Ref: 1, T: minutes(37), V: 37}, + }, + []record.RefSample{ // Contains both in-order and ooo sample. + {Ref: 1, T: minutes(50), V: 50}, + {Ref: 2, T: minutes(65), V: 65}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(50), V: 50}, + {Ref: 2, T: minutes(51), V: 51}, + {Ref: 2, T: minutes(52), V: 52}, + {Ref: 2, T: minutes(53), V: 53}, + }, + }, + }, + "integer histogram": { + appendSample: func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) { + seriesRef, err := app.AppendHistogram(0, l, minutes(mins), tsdbutil.GenerateTestHistogram(int(mins)), nil) + require.NoError(t, err) + return seriesRef, nil + }, + expectedOOORecords: []interface{}{ + // The MmapRef in this are not hand calculated, and instead taken from the test run. + // What is important here is the order of records, and that MmapRef increases for each record. + []record.RefMmapMarker{ + {Ref: 1}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(40), H: tsdbutil.GenerateTestHistogram(40)}, + }, + + []record.RefMmapMarker{ + {Ref: 2}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(42), H: tsdbutil.GenerateTestHistogram(42)}, + }, + + []record.RefHistogramSample{ + {Ref: 2, T: minutes(45), H: tsdbutil.GenerateTestHistogram(45)}, + {Ref: 1, T: minutes(35), H: tsdbutil.GenerateTestHistogram(35)}, + }, + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 8}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(36), H: tsdbutil.GenerateTestHistogram(36)}, + {Ref: 1, T: minutes(37), H: tsdbutil.GenerateTestHistogram(37)}, + }, + + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 89}, + }, + []record.RefHistogramSample{ // Does not contain the in-order sample here. + {Ref: 1, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + }, + + // Single commit but multiple OOO records. + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 172}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + {Ref: 2, T: minutes(51), H: tsdbutil.GenerateTestHistogram(51)}, + }, + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 257}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(52), H: tsdbutil.GenerateTestHistogram(52)}, + {Ref: 2, T: minutes(53), H: tsdbutil.GenerateTestHistogram(53)}, + }, + }, + expectedInORecords: []interface{}{ + []record.RefSeries{ + {Ref: 1, Labels: s1}, + {Ref: 2, Labels: s2}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(60), H: tsdbutil.GenerateTestHistogram(60)}, + {Ref: 2, T: minutes(60), H: tsdbutil.GenerateTestHistogram(60)}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(40), H: tsdbutil.GenerateTestHistogram(40)}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(42), H: tsdbutil.GenerateTestHistogram(42)}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(45), H: tsdbutil.GenerateTestHistogram(45)}, + {Ref: 1, T: minutes(35), H: tsdbutil.GenerateTestHistogram(35)}, + {Ref: 1, T: minutes(36), H: tsdbutil.GenerateTestHistogram(36)}, + {Ref: 1, T: minutes(37), H: tsdbutil.GenerateTestHistogram(37)}, + }, + []record.RefHistogramSample{ // Contains both in-order and ooo sample. + {Ref: 1, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + {Ref: 2, T: minutes(65), H: tsdbutil.GenerateTestHistogram(65)}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + {Ref: 2, T: minutes(51), H: tsdbutil.GenerateTestHistogram(51)}, + {Ref: 2, T: minutes(52), H: tsdbutil.GenerateTestHistogram(52)}, + {Ref: 2, T: minutes(53), H: tsdbutil.GenerateTestHistogram(53)}, + }, + }, + }, + "float histogram": { + appendSample: func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) { + seriesRef, err := app.AppendHistogram(0, l, minutes(mins), nil, tsdbutil.GenerateTestFloatHistogram(int(mins))) + require.NoError(t, err) + return seriesRef, nil + }, + expectedOOORecords: []interface{}{ + // The MmapRef in this are not hand calculated, and instead taken from the test run. + // What is important here is the order of records, and that MmapRef increases for each record. + []record.RefMmapMarker{ + {Ref: 1}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(40), FH: tsdbutil.GenerateTestFloatHistogram(40)}, + }, + + []record.RefMmapMarker{ + {Ref: 2}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(42), FH: tsdbutil.GenerateTestFloatHistogram(42)}, + }, + + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(45), FH: tsdbutil.GenerateTestFloatHistogram(45)}, + {Ref: 1, T: minutes(35), FH: tsdbutil.GenerateTestFloatHistogram(35)}, + }, + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 8}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(36), FH: tsdbutil.GenerateTestFloatHistogram(36)}, + {Ref: 1, T: minutes(37), FH: tsdbutil.GenerateTestFloatHistogram(37)}, + }, + + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 177}, + }, + []record.RefFloatHistogramSample{ // Does not contain the in-order sample here. + {Ref: 1, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + }, + + // Single commit but multiple OOO records. + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 348}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + {Ref: 2, T: minutes(51), FH: tsdbutil.GenerateTestFloatHistogram(51)}, + }, + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 521}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(52), FH: tsdbutil.GenerateTestFloatHistogram(52)}, + {Ref: 2, T: minutes(53), FH: tsdbutil.GenerateTestFloatHistogram(53)}, + }, + }, + expectedInORecords: []interface{}{ + []record.RefSeries{ + {Ref: 1, Labels: s1}, + {Ref: 2, Labels: s2}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(60), FH: tsdbutil.GenerateTestFloatHistogram(60)}, + {Ref: 2, T: minutes(60), FH: tsdbutil.GenerateTestFloatHistogram(60)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(40), FH: tsdbutil.GenerateTestFloatHistogram(40)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(42), FH: tsdbutil.GenerateTestFloatHistogram(42)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(45), FH: tsdbutil.GenerateTestFloatHistogram(45)}, + {Ref: 1, T: minutes(35), FH: tsdbutil.GenerateTestFloatHistogram(35)}, + {Ref: 1, T: minutes(36), FH: tsdbutil.GenerateTestFloatHistogram(36)}, + {Ref: 1, T: minutes(37), FH: tsdbutil.GenerateTestFloatHistogram(37)}, + }, + []record.RefFloatHistogramSample{ // Contains both in-order and ooo sample. + {Ref: 1, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + {Ref: 2, T: minutes(65), FH: tsdbutil.GenerateTestFloatHistogram(65)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + {Ref: 2, T: minutes(51), FH: tsdbutil.GenerateTestFloatHistogram(51)}, + {Ref: 2, T: minutes(52), FH: tsdbutil.GenerateTestFloatHistogram(52)}, + {Ref: 2, T: minutes(53), FH: tsdbutil.GenerateTestFloatHistogram(53)}, + }, + }, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testOOOWALWrite(t, scenario.appendSample, scenario.expectedOOORecords, scenario.expectedInORecords) + }) + } +} + +func testOOOWALWrite(t *testing.T, + appendSample func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error), + expectedOOORecords []interface{}, + expectedInORecords []interface{}, +) { dir := t.TempDir() opts := DefaultOptions() @@ -4004,18 +4301,14 @@ func TestOOOWALWrite(t *testing.T) { db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) s1, s2 := labels.FromStrings("l", "v1"), labels.FromStrings("l", "v2") - minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - - appendSample := func(app storage.Appender, l labels.Labels, mins int64) { - _, err = app.Append(0, l, minutes(mins), float64(mins)) - require.NoError(t, err) - } // Ingest sample at 1h. app := db.Appender(context.Background()) @@ -4055,92 +4348,6 @@ func TestOOOWALWrite(t *testing.T) { appendSample(app, s2, 53) require.NoError(t, app.Commit()) - // The MmapRef in this are not hand calculated, and instead taken from the test run. - // What is important here is the order of records, and that MmapRef increases for each record. - oooRecords := []interface{}{ - []record.RefMmapMarker{ - {Ref: 1}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(40), V: 40}, - }, - - []record.RefMmapMarker{ - {Ref: 2}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(42), V: 42}, - }, - - []record.RefSample{ - {Ref: 2, T: minutes(45), V: 45}, - {Ref: 1, T: minutes(35), V: 35}, - }, - []record.RefMmapMarker{ // 3rd sample, hence m-mapped. - {Ref: 1, MmapRef: 4294967304}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(36), V: 36}, - {Ref: 1, T: minutes(37), V: 37}, - }, - - []record.RefMmapMarker{ // 3rd sample, hence m-mapped. - {Ref: 1, MmapRef: 4294967354}, - }, - []record.RefSample{ // Does not contain the in-order sample here. - {Ref: 1, T: minutes(50), V: 50}, - }, - - // Single commit but multiple OOO records. - []record.RefMmapMarker{ - {Ref: 2, MmapRef: 4294967403}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(50), V: 50}, - {Ref: 2, T: minutes(51), V: 51}, - }, - []record.RefMmapMarker{ - {Ref: 2, MmapRef: 4294967452}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(52), V: 52}, - {Ref: 2, T: minutes(53), V: 53}, - }, - } - - inOrderRecords := []interface{}{ - []record.RefSeries{ - {Ref: 1, Labels: s1}, - {Ref: 2, Labels: s2}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(60), V: 60}, - {Ref: 2, T: minutes(60), V: 60}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(40), V: 40}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(42), V: 42}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(45), V: 45}, - {Ref: 1, T: minutes(35), V: 35}, - {Ref: 1, T: minutes(36), V: 36}, - {Ref: 1, T: minutes(37), V: 37}, - }, - []record.RefSample{ // Contains both in-order and ooo sample. - {Ref: 1, T: minutes(50), V: 50}, - {Ref: 2, T: minutes(65), V: 65}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(50), V: 50}, - {Ref: 2, T: minutes(51), V: 51}, - {Ref: 2, T: minutes(52), V: 52}, - {Ref: 2, T: minutes(53), V: 53}, - }, - } - getRecords := func(walDir string) []interface{} { sr, err := wlog.NewSegmentsReader(walDir) require.NoError(t, err) @@ -4149,10 +4356,8 @@ func TestOOOWALWrite(t *testing.T) { require.NoError(t, sr.Close()) }() - var ( - records []interface{} - dec record.Decoder = record.NewDecoder(labels.NewSymbolTable()) - ) + var records []interface{} + dec := record.NewDecoder(nil) for r.Next() { rec := r.Record() switch typ := dec.Type(rec); typ { @@ -4168,6 +4373,14 @@ func TestOOOWALWrite(t *testing.T) { markers, err := dec.MmapMarkers(rec, nil) require.NoError(t, err) records = append(records, markers) + case record.HistogramSamples: + histogramSamples, err := dec.HistogramSamples(rec, nil) + require.NoError(t, err) + records = append(records, histogramSamples) + case record.FloatHistogramSamples: + floatHistogramSamples, err := dec.FloatHistogramSamples(rec, nil) + require.NoError(t, err) + records = append(records, floatHistogramSamples) default: t.Fatalf("got a WAL record that is not series or samples: %v", typ) } @@ -4178,11 +4391,11 @@ func TestOOOWALWrite(t *testing.T) { // The normal WAL. actRecs := getRecords(path.Join(dir, "wal")) - testutil.RequireEqual(t, inOrderRecords, actRecs) + require.Equal(t, expectedInORecords, actRecs) // The WBL. actRecs = getRecords(path.Join(dir, wlog.WblDirName)) - testutil.RequireEqual(t, oooRecords, actRecs) + require.Equal(t, expectedOOORecords, actRecs) } // Tests https://github.com/prometheus/prometheus/issues/10291#issuecomment-1044373110. @@ -4381,7 +4594,7 @@ func TestMetadataCheckpointingOnlyKeepsLatestEntry(t *testing.T) { keep := func(id chunks.HeadSeriesRef) bool { return id != 3 } - _, err = wlog.Checkpoint(log.NewNopLogger(), w, first, last-1, keep, 0) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0) require.NoError(t, err) // Confirm there's been a checkpoint. @@ -4495,6 +4708,160 @@ func TestMetadataAssertInMemoryData(t *testing.T) { require.Equal(t, *reopenDB.head.series.getByHash(s4.Hash(), s4).meta, m4) } +// TestMultipleEncodingsCommitOrder mainly serves to demonstrate when happens when committing a batch of samples for the +// same series when there are multiple encodings. Commit() will process all float samples before histogram samples. This +// means that if histograms are appended before floats, the histograms could be marked as OOO when they are committed. +// While possible, this shouldn't happen very often - you need the same series to be ingested as both a float and a +// histogram in a single write request. +func TestMultipleEncodingsCommitOrder(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderCapMax = 30 + opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() + + series1 := labels.FromStrings("foo", "bar1") + + db := openTestDB(t, opts, nil) + db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + defer func() { + require.NoError(t, db.Close()) + }() + + addSample := func(app storage.Appender, ts int64, valType chunkenc.ValueType) chunks.Sample { + if valType == chunkenc.ValFloat { + _, err := app.Append(0, labels.FromStrings("foo", "bar1"), ts, float64(ts)) + require.NoError(t, err) + return sample{t: ts, f: float64(ts)} + } + if valType == chunkenc.ValHistogram { + h := tsdbutil.GenerateTestHistogram(int(ts)) + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + require.NoError(t, err) + return sample{t: ts, h: h} + } + fh := tsdbutil.GenerateTestFloatHistogram(int(ts)) + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nil, fh) + require.NoError(t, err) + return sample{t: ts, fh: fh} + } + + verifySamples := func(minT, maxT int64, expSamples []chunks.Sample, oooCount int) { + requireEqualOOOSamples(t, oooCount, db) + + // Verify samples querier. + querier, err := db.Querier(minT, maxT) + require.NoError(t, err) + defer querier.Close() + + seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.Len(t, seriesSet, 1) + gotSamples := seriesSet[series1.String()] + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) + + // Verify chunks querier. + chunkQuerier, err := db.ChunkQuerier(minT, maxT) + require.NoError(t, err) + defer chunkQuerier.Close() + + chks := queryChunks(t, chunkQuerier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.NotNil(t, chks[series1.String()]) + require.Len(t, chks, 1) + var gotChunkSamples []chunks.Sample + for _, chunk := range chks[series1.String()] { + it := chunk.Chunk.Iterator(nil) + smpls, err := storage.ExpandSamples(it, newSample) + require.NoError(t, err) + gotChunkSamples = append(gotChunkSamples, smpls...) + require.NoError(t, it.Err()) + } + requireEqualSamples(t, series1.String(), expSamples, gotChunkSamples, requireEqualSamplesIgnoreCounterResets) + } + + var expSamples []chunks.Sample + + // Append samples with different encoding types and then commit them at once. + app := db.Appender(context.Background()) + + for i := 100; i < 105; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + // These samples will be marked as OOO as their timestamps are less than the max timestamp for float samples in the + // same batch. + for i := 110; i < 120; i++ { + s := addSample(app, int64(i), chunkenc.ValHistogram) + expSamples = append(expSamples, s) + } + // These samples will be marked as OOO as their timestamps are less than the max timestamp for float samples in the + // same batch. + for i := 120; i < 130; i++ { + s := addSample(app, int64(i), chunkenc.ValFloatHistogram) + expSamples = append(expSamples, s) + } + // These samples will be marked as in-order as their timestamps are greater than the max timestamp for float + // samples in the same batch. + for i := 140; i < 150; i++ { + s := addSample(app, int64(i), chunkenc.ValFloatHistogram) + expSamples = append(expSamples, s) + } + // These samples will be marked as in-order, even though they're appended after the float histograms from ts 140-150 + // because float samples are processed first and these samples are in-order wrt to the float samples in the batch. + for i := 130; i < 135; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + + require.NoError(t, app.Commit()) + + sort.Slice(expSamples, func(i, j int) bool { + return expSamples[i].T() < expSamples[j].T() + }) + + // oooCount = 20 because the histograms from 120 - 130 and float histograms from 120 - 130 are detected as OOO. + verifySamples(100, 150, expSamples, 20) + + // Append and commit some in-order histograms by themselves. + app = db.Appender(context.Background()) + for i := 150; i < 160; i++ { + s := addSample(app, int64(i), chunkenc.ValHistogram) + expSamples = append(expSamples, s) + } + require.NoError(t, app.Commit()) + + // oooCount remains at 20 as no new OOO samples have been added. + verifySamples(100, 160, expSamples, 20) + + // Append and commit samples for all encoding types. This time all samples will be treated as OOO because samples + // with newer timestamps have already been committed. + app = db.Appender(context.Background()) + for i := 50; i < 55; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + for i := 60; i < 70; i++ { + s := addSample(app, int64(i), chunkenc.ValHistogram) + expSamples = append(expSamples, s) + } + for i := 70; i < 75; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + for i := 80; i < 90; i++ { + s := addSample(app, int64(i), chunkenc.ValFloatHistogram) + expSamples = append(expSamples, s) + } + require.NoError(t, app.Commit()) + + // Sort samples again because OOO samples have been added. + sort.Slice(expSamples, func(i, j int) bool { + return expSamples[i].T() < expSamples[j].T() + }) + + // oooCount = 50 as we've added 30 more OOO samples. + verifySamples(50, 160, expSamples, 50) +} + // TODO(codesome): test more samples incoming once compaction has started. To verify new samples after the start // // are not included in this compaction. @@ -4516,6 +4883,8 @@ func testOOOCompaction(t *testing.T, scenario sampleTypeScenario, addExtraSample opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -4721,6 +5090,8 @@ func testOOOCompactionWithNormalCompaction(t *testing.T, scenario sampleTypeScen db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -4826,10 +5197,14 @@ func testOOOCompactionWithDisabledWriteLog(t *testing.T, scenario sampleTypeScen opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() opts.WALSegmentSize = -1 // disabled WAL and WBL + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -4935,6 +5310,8 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa opts.OutOfOrderCapMax = 10 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() opts.EnableMemorySnapshotOnShutdown = true + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -5034,7 +5411,67 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa verifySamples(90, 109) } -func Test_Querier_OOOQuery(t *testing.T) { +func TestQuerierOOOQuery(t *testing.T) { + scenarios := map[string]struct { + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) + sampleFunc func(ts int64) chunks.Sample + }{ + "float": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + return app.Append(0, labels.FromStrings("foo", "bar1"), ts, float64(ts)) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + if counterReset { + h.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + fh := tsdbutil.GenerateTestFloatHistogram(int(ts)) + if counterReset { + fh.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nil, fh) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + "integer histogram counter resets": { + // Adding counter reset to all histograms means each histogram will have its own chunk. + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + h.CounterResetHint = histogram.CounterReset // For this scenario, ignore the counterReset argument. + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + } + + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testQuerierOOOQuery(t, scenario.appendFunc, scenario.sampleFunc) + }) + } +} + +func testQuerierOOOQuery(t *testing.T, + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error), + sampleFunc func(ts int64) chunks.Sample, +) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() @@ -5044,16 +5481,16 @@ func Test_Querier_OOOQuery(t *testing.T) { defaultFilterFunc := func(t int64) bool { return true } minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) { + addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc, counterReset bool) ([]chunks.Sample, int) { app := db.Appender(context.Background()) totalAppended := 0 for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() { if !filter(m / time.Minute.Milliseconds()) { continue } - _, err := app.Append(0, series1, m, float64(m)) + _, err := appendFunc(app, m, counterReset) if m >= queryMinT && m <= queryMaxT { - expSamples = append(expSamples, sample{t: m, f: float64(m)}) + expSamples = append(expSamples, sampleFunc(m)) } require.NoError(t, err) totalAppended++ @@ -5064,10 +5501,11 @@ func Test_Querier_OOOQuery(t *testing.T) { } type sampleBatch struct { - minT int64 - maxT int64 - filter filterFunc - isOOO bool + minT int64 + maxT int64 + filter filterFunc + counterReset bool + isOOO bool } tests := []struct { @@ -5115,6 +5553,31 @@ func Test_Querier_OOOQuery(t *testing.T) { }, }, }, + { + name: "alternating OOO batches", // In order: 100-200 normal. out of order first path: 0, 2, 4, ... 98 (no counter reset), second pass: 1, 3, 5, ... 99 (with counter reset). + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: true, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 1 }, + counterReset: true, + isOOO: true, + }, + }, + }, { name: "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval", oooCap: 30, @@ -5156,7 +5619,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, }, { - name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query inorder contain ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 5, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5169,7 +5632,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmapped OOO chunk and fit inside the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5182,7 +5645,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, }, { - name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query overlapping inorder and ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 30, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5195,7 +5658,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmapped OOO chunk and overlap the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5213,6 +5676,8 @@ func Test_Querier_OOOQuery(t *testing.T) { opts.OutOfOrderCapMax = tc.oooCap db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() defer func() { require.NoError(t, db.Close()) }() @@ -5221,7 +5686,7 @@ func Test_Querier_OOOQuery(t *testing.T) { var oooSamples, appendedCount int for _, batch := range tc.batches { - expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter) + expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter, batch.counterReset) if batch.isOOO { oooSamples += appendedCount } @@ -5236,15 +5701,107 @@ func Test_Querier_OOOQuery(t *testing.T) { defer querier.Close() seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) - require.NotNil(t, seriesSet[series1.String()]) + gotSamples := seriesSet[series1.String()] + require.NotNil(t, gotSamples) require.Len(t, seriesSet, 1) - require.Equal(t, expSamples, seriesSet[series1.String()]) + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) requireEqualOOOSamples(t, oooSamples, db) }) } } -func Test_ChunkQuerier_OOOQuery(t *testing.T) { +func TestChunkQuerierOOOQuery(t *testing.T) { + nBucketHistogram := func(n int64) *histogram.Histogram { + h := &histogram.Histogram{ + Count: uint64(n), + Sum: float64(n), + } + if n == 0 { + h.PositiveSpans = []histogram.Span{} + h.PositiveBuckets = []int64{} + return h + } + h.PositiveSpans = []histogram.Span{{Offset: 0, Length: uint32(n)}} + h.PositiveBuckets = make([]int64, n) + h.PositiveBuckets[0] = 1 + return h + } + + scenarios := map[string]struct { + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) + sampleFunc func(ts int64) chunks.Sample + checkInUseBucket bool + }{ + "float": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + return app.Append(0, labels.FromStrings("foo", "bar1"), ts, float64(ts)) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + if counterReset { + h.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + fh := tsdbutil.GenerateTestFloatHistogram(int(ts)) + if counterReset { + fh.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nil, fh) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + "integer histogram counter resets": { + // Adding counter reset to all histograms means each histogram will have its own chunk. + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + h.CounterResetHint = histogram.CounterReset // For this scenario, ignore the counterReset argument. + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "integer histogram with recode": { + // Histograms have increasing number of buckets so their chunks are recoded. + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + n := ts / time.Minute.Milliseconds() + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nBucketHistogram(n), nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + n := ts / time.Minute.Milliseconds() + return sample{t: ts, h: nBucketHistogram(n)} + }, + // Only check in-use buckets for this scenario. + // Recoding adds empty buckets. + checkInUseBucket: true, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testChunkQuerierOOOQuery(t, scenario.appendFunc, scenario.sampleFunc, scenario.checkInUseBucket) + }) + } +} + +func testChunkQuerierOOOQuery(t *testing.T, + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error), + sampleFunc func(ts int64) chunks.Sample, + checkInUseBuckets bool, +) { opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() @@ -5255,16 +5812,16 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { defaultFilterFunc := func(t int64) bool { return true } minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) { + addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc, counterReset bool) ([]chunks.Sample, int) { app := db.Appender(context.Background()) totalAppended := 0 for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() { if !filter(m / time.Minute.Milliseconds()) { continue } - _, err := app.Append(0, series1, m, float64(m)) + _, err := appendFunc(app, m, counterReset) if m >= queryMinT && m <= queryMaxT { - expSamples = append(expSamples, sample{t: m, f: float64(m)}) + expSamples = append(expSamples, sampleFunc(m)) } require.NoError(t, err) totalAppended++ @@ -5275,10 +5832,11 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { } type sampleBatch struct { - minT int64 - maxT int64 - filter filterFunc - isOOO bool + minT int64 + maxT int64 + filter filterFunc + counterReset bool + isOOO bool } tests := []struct { @@ -5326,6 +5884,31 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, }, }, + { + name: "alternating OOO batches", // In order: 100-200 normal. out of order first path: 0, 2, 4, ... 98 (no counter reset), second pass: 1, 3, 5, ... 99 (with counter reset). + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: true, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 1 }, + counterReset: true, + isOOO: true, + }, + }, + }, { name: "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval", oooCap: 30, @@ -5367,7 +5950,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, }, { - name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query inorder contain ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 5, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5380,7 +5963,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmapped OOO chunk and fit inside the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5393,7 +5976,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, }, { - name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query overlapping inorder and ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 30, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5406,7 +5989,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmapped OOO chunk and overlap the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5424,6 +6007,8 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { opts.OutOfOrderCapMax = tc.oooCap db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() defer func() { require.NoError(t, db.Close()) }() @@ -5432,7 +6017,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { var oooSamples, appendedCount int for _, batch := range tc.batches { - expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter) + expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter, batch.counterReset) if batch.isOOO { oooSamples += appendedCount } @@ -5453,12 +6038,204 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { var gotSamples []chunks.Sample for _, chunk := range chks[series1.String()] { it := chunk.Chunk.Iterator(nil) - for it.Next() == chunkenc.ValFloat { - ts, v := it.At() - gotSamples = append(gotSamples, sample{t: ts, f: v}) + smpls, err := storage.ExpandSamples(it, newSample) + require.NoError(t, err) + + // Verify that no sample is outside the chunk's time range. + for i, s := range smpls { + switch i { + case 0: + require.Equal(t, chunk.MinTime, s.T(), "first sample %v not at chunk min time %v", s, chunk.MinTime) + case len(smpls) - 1: + require.Equal(t, chunk.MaxTime, s.T(), "last sample %v not at chunk max time %v", s, chunk.MaxTime) + default: + require.GreaterOrEqual(t, s.T(), chunk.MinTime, "sample %v before chunk min time %v", s, chunk.MinTime) + require.LessOrEqual(t, s.T(), chunk.MaxTime, "sample %v after chunk max time %v", s, chunk.MaxTime) + } + } + + gotSamples = append(gotSamples, smpls...) + require.NoError(t, it.Err()) + } + if checkInUseBuckets { + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets, requireEqualSamplesInUseBucketCompare) + } else { + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) + } + }) + } +} + +// TestOOONativeHistogramsWithCounterResets verifies the counter reset headers for in-order and out-of-order samples +// upon ingestion. Note that when the counter reset(s) occur in OOO samples, the header is set to UnknownCounterReset +// rather than CounterReset. This is because with OOO native histogram samples, it cannot be definitely +// determined if a counter reset occurred because the samples are not consecutive, and another sample +// could potentially come in that would change the status of the header. In this case, the UnknownCounterReset +// headers would be re-checked at query time and updated as needed. However, this test is checking the counter +// reset headers at the time of storage. +func TestOOONativeHistogramsWithCounterResets(t *testing.T) { + for name, scenario := range sampleTypeScenarios { + t.Run(name, func(t *testing.T) { + if name == intHistogram || name == floatHistogram { + testOOONativeHistogramsWithCounterResets(t, scenario) + } + }) + } +} + +func testOOONativeHistogramsWithCounterResets(t *testing.T, scenario sampleTypeScenario) { + opts := DefaultOptions() + opts.OutOfOrderCapMax = 30 + opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() + + type resetFunc func(v int64) bool + defaultResetFunc := func(v int64) bool { return false } + + lbls := labels.FromStrings("foo", "bar1") + minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } + + type sampleBatch struct { + from int64 + until int64 + shouldReset resetFunc + expCounterResetHints []histogram.CounterResetHint + } + + tests := []struct { + name string + queryMin int64 + queryMax int64 + batches []sampleBatch + expectedSamples []chunks.Sample + }{ + { + name: "Counter reset within in-order samples", + queryMin: minutes(40), + queryMax: minutes(55), + batches: []sampleBatch{ + // In-order samples + { + from: 40, + until: 50, + shouldReset: func(v int64) bool { + return v == 45 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.CounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + }, + }, + { + name: "Counter reset right at beginning of OOO samples", + queryMin: minutes(40), + queryMax: minutes(55), + batches: []sampleBatch{ + // In-order samples + { + from: 40, + until: 45, + shouldReset: defaultResetFunc, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + { + from: 50, + until: 55, + shouldReset: defaultResetFunc, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + // OOO samples + { + from: 45, + until: 50, + shouldReset: func(v int64) bool { + return v == 45 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + }, + }, + { + name: "Counter resets in both in-order and OOO samples", + queryMin: minutes(40), + queryMax: minutes(55), + batches: []sampleBatch{ + // In-order samples + { + from: 40, + until: 45, + shouldReset: func(v int64) bool { + return v == 44 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.UnknownCounterReset}, + }, + { + from: 50, + until: 55, + shouldReset: defaultResetFunc, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + // OOO samples + { + from: 45, + until: 50, + shouldReset: func(v int64) bool { + return v == 49 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.UnknownCounterReset}, + }, + }, + }, + } + for _, tc := range tests { + t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) { + db := openTestDB(t, opts, nil) + db.DisableCompactions() + db.EnableOOONativeHistograms() + defer func() { + require.NoError(t, db.Close()) + }() + + app := db.Appender(context.Background()) + + expSamples := make(map[string][]chunks.Sample) + + for _, batch := range tc.batches { + j := batch.from + smplIdx := 0 + for i := batch.from; i < batch.until; i++ { + resetCount := batch.shouldReset(i) + if resetCount { + j = 0 + } + _, s, err := scenario.appendFunc(app, lbls, minutes(i), j) + require.NoError(t, err) + if s.Type() == chunkenc.ValHistogram { + s.H().CounterResetHint = batch.expCounterResetHints[smplIdx] + } else if s.Type() == chunkenc.ValFloatHistogram { + s.FH().CounterResetHint = batch.expCounterResetHints[smplIdx] + } + expSamples[lbls.String()] = append(expSamples[lbls.String()], s) + j++ + smplIdx++ } } - require.Equal(t, expSamples, gotSamples) + + require.NoError(t, app.Commit()) + + for k, v := range expSamples { + sort.Slice(v, func(i, j int) bool { + return v[i].T() < v[j].T() + }) + expSamples[k] = v + } + + querier, err := db.Querier(tc.queryMin, tc.queryMax) + require.NoError(t, err) + defer querier.Close() + + seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.NotNil(t, seriesSet[lbls.String()]) + require.Len(t, seriesSet, 1) + requireEqualSeries(t, expSamples, seriesSet, false) }) } } @@ -5478,6 +6255,8 @@ func testOOOAppendAndQuery(t *testing.T, scenario sampleTypeScenario) { db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -5555,9 +6334,9 @@ func testOOOAppendAndQuery(t *testing.T, scenario sampleTypeScenario) { addSample(s2, 255, 265, false) verifyOOOMinMaxTimes(250, 265) testQuery(math.MinInt64, math.MaxInt64) - testQuery(minutes(250), minutes(265)) // Test querying ono data time range - testQuery(minutes(290), minutes(300)) // Test querying in-order data time range - testQuery(minutes(250), minutes(300)) // Test querying the entire range + testQuery(minutes(250), minutes(265)) // Test querying ooo data time range. + testQuery(minutes(290), minutes(300)) // Test querying in-order data time range. + testQuery(minutes(250), minutes(300)) // Test querying the entire range. // Out of time window. addSample(s1, 59, 59, true) @@ -5609,6 +6388,8 @@ func testOOODisabled(t *testing.T, scenario sampleTypeScenario) { opts.OutOfOrderTimeWindow = 0 db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -5681,6 +6462,8 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 4 * time.Hour.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db := openTestDB(t, opts, nil) db.DisableCompactions() @@ -5830,7 +6613,7 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) { resetMmapToOriginal() // We neet to reset because new duplicate chunks can be written above. // Removing m-map markers in WBL by rewriting it. - newWbl, err := wlog.New(log.NewNopLogger(), nil, filepath.Join(t.TempDir(), "new_wbl"), wlog.CompressionNone) + newWbl, err := wlog.New(promslog.NewNopLogger(), nil, filepath.Join(t.TempDir(), "new_wbl"), wlog.CompressionNone) require.NoError(t, err) sr, err := wlog.NewSegmentsReader(originalWblDir) require.NoError(t, err) @@ -5861,6 +6644,380 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) { }) } +func TestOOOHistogramCompactionWithCounterResets(t *testing.T) { + for _, floatHistogram := range []bool{false, true} { + dir := t.TempDir() + ctx := context.Background() + + opts := DefaultOptions() + opts.OutOfOrderCapMax = 30 + opts.OutOfOrderTimeWindow = 500 * time.Minute.Milliseconds() + + db, err := Open(dir, nil, nil, opts, nil) + require.NoError(t, err) + db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + + series1 := labels.FromStrings("foo", "bar1") + series2 := labels.FromStrings("foo", "bar2") + + var series1ExpSamplesPreCompact, series2ExpSamplesPreCompact, series1ExpSamplesPostCompact, series2ExpSamplesPostCompact []chunks.Sample + + addSample := func(ts int64, l labels.Labels, val int, hint histogram.CounterResetHint) sample { + app := db.Appender(context.Background()) + tsMs := ts * time.Minute.Milliseconds() + if floatHistogram { + h := tsdbutil.GenerateTestFloatHistogram(val) + h.CounterResetHint = hint + _, err = app.AppendHistogram(0, l, tsMs, nil, h) + require.NoError(t, err) + require.NoError(t, app.Commit()) + return sample{t: tsMs, fh: h.Copy()} + } + + h := tsdbutil.GenerateTestHistogram(val) + h.CounterResetHint = hint + _, err = app.AppendHistogram(0, l, tsMs, h, nil) + require.NoError(t, err) + require.NoError(t, app.Commit()) + return sample{t: tsMs, h: h.Copy()} + } + + // Add an in-order sample to each series. + s := addSample(520, series1, 1000000, histogram.UnknownCounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + + s = addSample(520, series2, 1000000, histogram.UnknownCounterReset) + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + + // Verify that the in-memory ooo chunk is empty. + checkEmptyOOOChunk := func(lbls labels.Labels) { + ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) + require.NoError(t, err) + require.False(t, created) + require.Nil(t, ms.ooo) + } + + checkEmptyOOOChunk(series1) + checkEmptyOOOChunk(series2) + + // Add samples for series1. There are three head chunks that will be created: + // Chunk 1 - Samples between 100 - 440. One explicit counter reset at ts 250. + // Chunk 2 - Samples between 105 - 395. Overlaps with Chunk 1. One detected counter reset at ts 165. + // Chunk 3 - Samples between 480 - 509. All within one block boundary. One detected counter reset at 490. + + // Chunk 1. + // First add 10 samples. + for i := 100; i < 200; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // Before compaction, all the samples have UnknownCounterReset even though they've been added to the same + // chunk. This is because they overlap with the samples from chunk two and when merging two chunks on read, + // the header is set as unknown when the next sample is not in the same chunk as the previous one. + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // After compaction, samples from multiple mmapped chunks will be merged, so there won't be any overlapping + // chunks. Therefore, most samples will have the NotCounterReset header. + // 100 is the first sample in the first chunk in the blocks, so is still set to UnknownCounterReset. + // 120 is a block boundary - after compaction, 120 will be the first sample in a chunk, so is still set to + // UnknownCounterReset. + if i > 100 && i != 120 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + // Explicit counter reset - the counter reset header is set to CounterReset but the value is higher + // than for the previous timestamp. Explicit counter reset headers are actually ignored though, so when reading + // the sample back you actually get unknown/not counter reset. This is as the chainSampleIterator ignores + // existing headers and sets the header as UnknownCounterReset if the next sample is not in the same chunk as + // the previous one, and counter resets always create a new chunk. + // This case has been added to document what's happening, though it might not be the ideal behavior. + s = addSample(250, series1, 100000+250, histogram.CounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, copyWithCounterReset(s, histogram.UnknownCounterReset)) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, copyWithCounterReset(s, histogram.NotCounterReset)) + + // Add 19 more samples to complete a chunk. + for i := 260; i < 450; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // The samples with timestamp less than 410 overlap with the samples from chunk 2, so before compaction, + // they're all UnknownCounterReset. Samples greater than or equal to 410 don't overlap with other chunks + // so they're always detected as NotCounterReset pre and post compaction/ + if i >= 410 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // + // 360 is a block boundary, so after compaction its header is still UnknownCounterReset. + if i != 360 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + + // Chunk 2. + // Add six OOO samples. + for i := 105; i < 165; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // Samples overlap with chunk 1 so before compaction all headers are UnknownCounterReset. + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, copyWithCounterReset(s, histogram.NotCounterReset)) + } + + // Add sample that will be detected as a counter reset. + s = addSample(165, series1, 100000, histogram.UnknownCounterReset) + // Before compaction, sample has an UnknownCounterReset header due to the chainSampleIterator. + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // After compaction, the sample's counter reset is properly detected. + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, copyWithCounterReset(s, histogram.CounterReset)) + + // Add 23 more samples to complete a chunk. + for i := 175; i < 405; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // Samples between 205-255 overlap with chunk 1 so before compaction those samples will have the + // UnknownCounterReset header. + if i >= 205 && i < 255 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // 245 is the first sample >= the block boundary at 240, so it's still UnknownCounterReset after compaction. + if i != 245 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } else { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + + // Chunk 3. + for i := 480; i < 490; i++ { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // No overlapping samples in other chunks, so all other samples will already be detected as NotCounterReset + // before compaction. + if i > 480 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // 480 is block boundary. + if i == 480 { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + // Counter reset. + s = addSample(int64(490), series1, 100000, histogram.UnknownCounterReset) + s = copyWithCounterReset(s, histogram.CounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + // Add some more samples after the counter reset. + for i := 491; i < 510; i++ { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + s = copyWithCounterReset(s, histogram.NotCounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + + // Add samples for series2 - one chunk with one detected counter reset at 300. + for i := 200; i < 300; i += 10 { + s = addSample(int64(i), series2, 100000+i, histogram.UnknownCounterReset) + if i > 200 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + if i == 240 { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + } + // Counter reset. + s = addSample(int64(300), series2, 100000, histogram.UnknownCounterReset) + s = copyWithCounterReset(s, histogram.CounterReset) + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + // Add some more samples after the counter reset. + for i := 310; i < 500; i += 10 { + s := addSample(int64(i), series2, 100000+i, histogram.UnknownCounterReset) + s = copyWithCounterReset(s, histogram.NotCounterReset) + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + // 360 and 480 are block boundaries. + if i == 360 || i == 480 { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + } + + // Sort samples (as OOO samples not added in time-order). + sort.Slice(series1ExpSamplesPreCompact, func(i, j int) bool { + return series1ExpSamplesPreCompact[i].T() < series1ExpSamplesPreCompact[j].T() + }) + sort.Slice(series1ExpSamplesPostCompact, func(i, j int) bool { + return series1ExpSamplesPostCompact[i].T() < series1ExpSamplesPostCompact[j].T() + }) + sort.Slice(series2ExpSamplesPreCompact, func(i, j int) bool { + return series2ExpSamplesPreCompact[i].T() < series2ExpSamplesPreCompact[j].T() + }) + sort.Slice(series2ExpSamplesPostCompact, func(i, j int) bool { + return series2ExpSamplesPostCompact[i].T() < series2ExpSamplesPostCompact[j].T() + }) + + verifyDBSamples := func(s1Samples, s2Samples []chunks.Sample) { + expRes := map[string][]chunks.Sample{ + series1.String(): s1Samples, + series2.String(): s2Samples, + } + + q, err := db.Querier(math.MinInt64, math.MaxInt64) + require.NoError(t, err) + actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) + requireEqualSeries(t, expRes, actRes, false) + } + + // Verify DB samples before compaction. + verifyDBSamples(series1ExpSamplesPreCompact, series2ExpSamplesPreCompact) + + // Verify that the in-memory ooo chunk is not empty. + checkNonEmptyOOOChunk := func(lbls labels.Labels) { + ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) + require.NoError(t, err) + require.False(t, created) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) + } + + checkNonEmptyOOOChunk(series1) + checkNonEmptyOOOChunk(series2) + + // No blocks before compaction. + require.Empty(t, db.Blocks()) + + // There is a 0th WBL file. + require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + files, err := os.ReadDir(db.head.wbl.Dir()) + require.NoError(t, err) + require.Len(t, files, 1) + require.Equal(t, "00000000", files[0].Name()) + f, err := files[0].Info() + require.NoError(t, err) + require.Greater(t, f.Size(), int64(100)) + + // OOO compaction happens here. + require.NoError(t, db.CompactOOOHead(ctx)) + + // Check that blocks are created after compaction. + require.Len(t, db.Blocks(), 5) + + // Check samples after compaction. + verifyDBSamples(series1ExpSamplesPostCompact, series2ExpSamplesPostCompact) + + // 0th WBL file will be deleted and 1st will be the only present. + files, err = os.ReadDir(db.head.wbl.Dir()) + require.NoError(t, err) + require.Len(t, files, 1) + require.Equal(t, "00000001", files[0].Name()) + f, err = files[0].Info() + require.NoError(t, err) + require.Equal(t, int64(0), f.Size()) + + // OOO stuff should not be present in the Head now. + checkEmptyOOOChunk(series1) + checkEmptyOOOChunk(series2) + + verifyBlockSamples := func(block *Block, fromMins, toMins int64) { + var series1Samples, series2Samples []chunks.Sample + + for _, s := range series1ExpSamplesPostCompact { + if s.T() >= fromMins*time.Minute.Milliseconds() { + // Samples should be sorted, so break out of loop when we reach a timestamp that's too big. + if s.T() > toMins*time.Minute.Milliseconds() { + break + } + series1Samples = append(series1Samples, s) + } + } + for _, s := range series2ExpSamplesPostCompact { + if s.T() >= fromMins*time.Minute.Milliseconds() { + // Samples should be sorted, so break out of loop when we reach a timestamp that's too big. + if s.T() > toMins*time.Minute.Milliseconds() { + break + } + series2Samples = append(series2Samples, s) + } + } + + expRes := map[string][]chunks.Sample{} + if len(series1Samples) != 0 { + expRes[series1.String()] = series1Samples + } + if len(series2Samples) != 0 { + expRes[series2.String()] = series2Samples + } + + q, err := NewBlockQuerier(block, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + + actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) + requireEqualSeries(t, expRes, actRes, false) + } + + // Checking for expected data in the blocks. + verifyBlockSamples(db.Blocks()[0], 100, 119) + verifyBlockSamples(db.Blocks()[1], 120, 239) + verifyBlockSamples(db.Blocks()[2], 240, 359) + verifyBlockSamples(db.Blocks()[3], 360, 479) + verifyBlockSamples(db.Blocks()[4], 480, 509) + + // There should be a single m-map file. + mmapDir := mmappedChunksDir(db.head.opts.ChunkDirRoot) + files, err = os.ReadDir(mmapDir) + require.NoError(t, err) + require.Len(t, files, 1) + + // Compact the in-order head and expect another block. + // Since this is a forced compaction, this block is not aligned with 2h. + err = db.CompactHead(NewRangeHead(db.head, 500*time.Minute.Milliseconds(), 550*time.Minute.Milliseconds())) + require.NoError(t, err) + require.Len(t, db.Blocks(), 6) + verifyBlockSamples(db.Blocks()[5], 520, 520) + + // Blocks created out of normal and OOO head now. But not merged. + verifyDBSamples(series1ExpSamplesPostCompact, series2ExpSamplesPostCompact) + + // The compaction also clears out the old m-map files. Including + // the file that has ooo chunks. + files, err = os.ReadDir(mmapDir) + require.NoError(t, err) + require.Len(t, files, 1) + require.Equal(t, "000001", files[0].Name()) + + // This will merge overlapping block. + require.NoError(t, db.Compact(ctx)) + + require.Len(t, db.Blocks(), 5) + verifyBlockSamples(db.Blocks()[0], 100, 119) + verifyBlockSamples(db.Blocks()[1], 120, 239) + verifyBlockSamples(db.Blocks()[2], 240, 359) + verifyBlockSamples(db.Blocks()[3], 360, 479) + verifyBlockSamples(db.Blocks()[4], 480, 520) // Merged block. + + // Final state. Blocks from normal and OOO head are merged. + verifyDBSamples(series1ExpSamplesPostCompact, series2ExpSamplesPostCompact) + } +} + +func copyWithCounterReset(s sample, hint histogram.CounterResetHint) sample { + if s.h != nil { + h := s.h.Copy() + h.CounterResetHint = hint + return sample{t: s.t, h: h} + } + + h := s.fh.Copy() + h.CounterResetHint = hint + return sample{t: s.t, fh: h} +} + func TestOOOCompactionFailure(t *testing.T) { for name, scenario := range sampleTypeScenarios { t.Run(name, func(t *testing.T) { @@ -5880,6 +7037,8 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) { db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -5907,7 +7066,7 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) { // There is a 0th WBL file. verifyFirstWBLFileIs0 := func(count int) { - require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + require.NoError(t, db.head.wbl.Sync()) // Syncing to make sure wbl is flushed in windows. files, err := os.ReadDir(db.head.wbl.Dir()) require.NoError(t, err) require.Len(t, files, count) @@ -5961,7 +7120,7 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) { require.Len(t, db.Blocks(), 3) require.Equal(t, oldBlocks, db.Blocks()) - // There should be a single m-map file + // There should be a single m-map file. verifyMmapFiles("000001") // All but last WBL file will be deleted. @@ -6057,7 +7216,7 @@ func TestWBLCorruption(t *testing.T) { // should be deleted after replay. // Checking where we corrupt it. - require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + require.NoError(t, db.head.wbl.Sync()) // Syncing to make sure wbl is flushed in windows. files, err := os.ReadDir(db.head.wbl.Dir()) require.NoError(t, err) require.Len(t, files, 2) @@ -6080,7 +7239,7 @@ func TestWBLCorruption(t *testing.T) { addSamples(310, 320, false) // Verifying that we have data after corruption point. - require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + require.NoError(t, db.head.wbl.Sync()) // Syncing to make sure wbl is flushed in windows. files, err = os.ReadDir(db.head.wbl.Dir()) require.NoError(t, err) require.Len(t, files, 3) @@ -6167,6 +7326,8 @@ func testOOOMmapCorruption(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderCapMax = 10 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6300,6 +7461,8 @@ func testOutOfOrderRuntimeConfig(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = oooTimeWindow + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6593,6 +7756,8 @@ func testNoGapAfterRestartWithOOO(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = 30 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6651,6 +7816,8 @@ func testWblReplayAfterOOODisableAndRestart(t *testing.T, scenario sampleTypeSce opts := DefaultOptions() opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6718,6 +7885,8 @@ func testPanicOnApplyConfig(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6775,6 +7944,8 @@ func testDiskFillingUpAfterDisablingOOO(t *testing.T, scenario sampleTypeScenari opts := DefaultOptions() opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -7147,28 +8318,16 @@ func TestQueryHistogramFromBlocksWithCompaction(t *testing.T) { createBlock(t, db.Dir(), series) for _, s := range series { - key := s.Labels().String() + lbls := s.Labels().String() + slice := exp[lbls] it = s.Iterator(it) - slice := exp[key] - for typ := it.Next(); typ != chunkenc.ValNone; typ = it.Next() { - switch typ { - case chunkenc.ValFloat: - ts, v := it.At() - slice = append(slice, sample{t: ts, f: v}) - case chunkenc.ValHistogram: - ts, h := it.AtHistogram(nil) - slice = append(slice, sample{t: ts, h: h}) - case chunkenc.ValFloatHistogram: - ts, h := it.AtFloatHistogram(nil) - slice = append(slice, sample{t: ts, fh: h}) - default: - t.Fatalf("unexpected sample value type %d", typ) - } - } + smpls, err := storage.ExpandSamples(it, nil) + require.NoError(t, err) + slice = append(slice, smpls...) sort.Slice(slice, func(i, j int) bool { return slice[i].T() < slice[j].T() }) - exp[key] = slice + exp[lbls] = slice } } @@ -7201,10 +8360,10 @@ func TestQueryHistogramFromBlocksWithCompaction(t *testing.T) { // due to origin from different overlapping chunks anymore. for _, ss := range exp { for i, s := range ss[1:] { - if s.H() != nil && ss[i].H() != nil && s.H().CounterResetHint == histogram.UnknownCounterReset { + if s.Type() == chunkenc.ValHistogram && ss[i].Type() == chunkenc.ValHistogram && s.H().CounterResetHint == histogram.UnknownCounterReset { s.H().CounterResetHint = histogram.NotCounterReset } - if s.FH() != nil && ss[i].FH() != nil && s.FH().CounterResetHint == histogram.UnknownCounterReset { + if s.Type() == chunkenc.ValFloatHistogram && ss[i].Type() == chunkenc.ValFloatHistogram && s.FH().CounterResetHint == histogram.UnknownCounterReset { s.FH().CounterResetHint = histogram.NotCounterReset } } @@ -7328,6 +8487,112 @@ func TestNativeHistogramFlag(t *testing.T) { }, act) } +func TestOOONativeHistogramFlag(t *testing.T) { + h := &histogram.Histogram{ + Count: 9, + ZeroCount: 4, + ZeroThreshold: 0.001, + Sum: 35.5, + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 2, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, + } + + l := labels.FromStrings("foo", "bar") + + t.Run("Test OOO native histograms if OOO is disabled", func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 0 + db := openTestDB(t, opts, []int64{100}) + defer func() { + require.NoError(t, db.Close()) + }() + + // Enable Native Histograms and OOO Native Histogram ingestion + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + + app := db.Appender(context.Background()) + _, err := app.AppendHistogram(0, l, 100, h, nil) + require.NoError(t, err) + + _, err = app.AppendHistogram(0, l, 50, h, nil) + require.NoError(t, err) // The OOO sample is not detected until it is committed, so no error is returned + + require.NoError(t, app.Commit()) + + q, err := db.Querier(math.MinInt, math.MaxInt64) + require.NoError(t, err) + act := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + require.Equal(t, map[string][]chunks.Sample{ + l.String(): {sample{t: 100, h: h}}, + }, act) + }) + t.Run("Test OOO Native Histograms if Native Histograms are disabled", func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 100 + db := openTestDB(t, opts, []int64{100}) + defer func() { + require.NoError(t, db.Close()) + }() + + // Disable Native Histograms and enable OOO Native Histogram ingestion + db.DisableNativeHistograms() + db.EnableOOONativeHistograms() + + // Attempt to add an in-order sample + app := db.Appender(context.Background()) + _, err := app.AppendHistogram(0, l, 200, h, nil) + require.Equal(t, storage.ErrNativeHistogramsDisabled, err) + + // Attempt to add an OOO sample + _, err = app.AppendHistogram(0, l, 100, h, nil) + require.Equal(t, storage.ErrNativeHistogramsDisabled, err) + + require.NoError(t, app.Commit()) + + q, err := db.Querier(math.MinInt, math.MaxInt64) + require.NoError(t, err) + act := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + require.Equal(t, map[string][]chunks.Sample{}, act) + }) + t.Run("Test OOO native histograms when flag is enabled", func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 100 + db := openTestDB(t, opts, []int64{100}) + defer func() { + require.NoError(t, db.Close()) + }() + + // Enable Native Histograms and OOO Native Histogram ingestion + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + + // Add in-order samples + app := db.Appender(context.Background()) + _, err := app.AppendHistogram(0, l, 200, h, nil) + require.NoError(t, err) + + // Add OOO samples + _, err = app.AppendHistogram(0, l, 100, h, nil) + require.NoError(t, err) + _, err = app.AppendHistogram(0, l, 150, h, nil) + require.NoError(t, err) + + require.NoError(t, app.Commit()) + + q, err := db.Querier(math.MinInt, math.MaxInt64) + require.NoError(t, err) + act := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + requireEqualSeries(t, map[string][]chunks.Sample{ + l.String(): {sample{t: 100, h: h}, sample{t: 150, h: h}, sample{t: 200, h: h}}, + }, act, true) + }) +} + // compareSeries essentially replaces `require.Equal(t, expected, actual) in // situations where the actual series might contain more counter reset hints // "unknown" than the expected series. This can easily happen for long series @@ -7343,29 +8608,47 @@ func compareSeries(t require.TestingT, expected, actual map[string][]chunks.Samp // package. require.Equal(t, expected, actual, "number of series differs") } - for key, eSamples := range expected { - aSamples, ok := actual[key] + for key, expSamples := range expected { + actSamples, ok := actual[key] if !ok { require.Equal(t, expected, actual, "expected series %q not found", key) } - if len(eSamples) != len(aSamples) { - require.Equal(t, eSamples, aSamples, "number of samples for series %q differs", key) + if len(expSamples) != len(actSamples) { + require.Equal(t, expSamples, actSamples, "number of samples for series %q differs", key) } - for i, eS := range eSamples { - aS := aSamples[i] - aH, eH := aS.H(), eS.H() - aFH, eFH := aS.FH(), eS.FH() - switch { - case aH != nil && eH != nil && aH.CounterResetHint == histogram.UnknownCounterReset && eH.CounterResetHint != histogram.GaugeType: - eH = eH.Copy() - eH.CounterResetHint = histogram.UnknownCounterReset - eS = sample{t: eS.T(), h: eH} - case aFH != nil && eFH != nil && aFH.CounterResetHint == histogram.UnknownCounterReset && eFH.CounterResetHint != histogram.GaugeType: - eFH = eFH.Copy() - eFH.CounterResetHint = histogram.UnknownCounterReset - eS = sample{t: eS.T(), fh: eFH} + + for i, eS := range expSamples { + aS := actSamples[i] + + // Must use the interface as Equal does not work when actual types differ + // not only does the type differ, but chunk.Sample.FH() interface may auto convert from chunk.Sample.H()! + require.Equal(t, eS.T(), aS.T(), "timestamp of sample %d in series %q differs", i, key) + + require.Equal(t, eS.Type(), aS.Type(), "type of sample %d in series %q differs", i, key) + + switch eS.Type() { + case chunkenc.ValFloat: + require.Equal(t, eS.F(), aS.F(), "sample %d in series %q differs", i, key) + case chunkenc.ValHistogram: + eH, aH := eS.H(), aS.H() + if aH.CounterResetHint == histogram.UnknownCounterReset && aH.CounterResetHint != histogram.GaugeType { + eH = eH.Copy() + // It is always safe to set the counter reset hint to UnknownCounterReset + eH.CounterResetHint = histogram.UnknownCounterReset + eS = sample{t: eS.T(), h: eH} + } + require.Equal(t, eH, aH, "histogram sample %d in series %q differs", i, key) + + case chunkenc.ValFloatHistogram: + eFH, aFH := eS.FH(), aS.FH() + if aFH.CounterResetHint == histogram.UnknownCounterReset && aFH.CounterResetHint != histogram.GaugeType { + eFH = eFH.Copy() + // It is always safe to set the counter reset hint to UnknownCounterReset + eFH.CounterResetHint = histogram.UnknownCounterReset + eS = sample{t: eS.T(), fh: eFH} + } + require.Equal(t, eFH, aFH, "float histogram sample %d in series %q differs", i, key) } - require.Equal(t, eS, aS, "sample %d in series %q differs", i, key) } } } @@ -7468,7 +8751,7 @@ func TestAbortBlockCompactions(t *testing.T) { defer func() { require.NoError(t, db.Close()) }() - // It should NOT be compactible at the beginning of the test + // It should NOT be compactable at the beginning of the test require.False(t, db.head.compactable(), "head should NOT be compactable") // Track the number of compactions run inside db.compactBlocks() @@ -7478,7 +8761,7 @@ func TestAbortBlockCompactions(t *testing.T) { db.compactor = &mockCompactorFn{ planFn: func() ([]string, error) { // On every Plan() run increment compactions. After 4 compactions - // update HEAD to make it compactible to force an exit from db.compactBlocks() loop. + // update HEAD to make it compactable to force an exit from db.compactBlocks() loop. compactions++ if compactions > 3 { chunkRange := db.head.chunkRange.Load() @@ -7507,7 +8790,7 @@ func TestNewCompactorFunc(t *testing.T) { opts := DefaultOptions() block1 := ulid.MustNew(1, nil) block2 := ulid.MustNew(2, nil) - opts.NewCompactorFunc = func(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) { + opts.NewCompactorFunc = func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) { return &mockCompactorFn{ planFn: func() ([]string, error) { return []string{block1.String(), block2.String()}, nil @@ -7613,23 +8896,151 @@ func TestBlockQuerierAndBlockChunkQuerier(t *testing.T) { } func TestGenerateCompactionDelay(t *testing.T) { - assertDelay := func(delay time.Duration) { + assertDelay := func(delay time.Duration, expectedMaxPercentDelay int) { t.Helper() require.GreaterOrEqual(t, delay, time.Duration(0)) - // Less than 10% of the chunkRange. - require.LessOrEqual(t, delay, 6000*time.Millisecond) + // Expect to generate a delay up to MaxPercentDelay of the head chunk range + require.LessOrEqual(t, delay, (time.Duration(60000*expectedMaxPercentDelay/100) * time.Millisecond)) } opts := DefaultOptions() - opts.EnableDelayedCompaction = true - db := openTestDB(t, opts, []int64{60000}) - defer func() { - require.NoError(t, db.Close()) - }() - // The offset is generated and changed while opening. - assertDelay(db.opts.CompactionDelay) + cases := []struct { + compactionDelayPercent int + }{ + { + compactionDelayPercent: 1, + }, + { + compactionDelayPercent: 10, + }, + { + compactionDelayPercent: 60, + }, + { + compactionDelayPercent: 100, + }, + } - for i := 0; i < 1000; i++ { - assertDelay(db.generateCompactionDelay()) + opts.EnableDelayedCompaction = true + + for _, c := range cases { + opts.CompactionDelayMaxPercent = c.compactionDelayPercent + db := openTestDB(t, opts, []int64{60000}) + defer func() { + require.NoError(t, db.Close()) + }() + // The offset is generated and changed while opening. + assertDelay(db.opts.CompactionDelay, c.compactionDelayPercent) + + for i := 0; i < 1000; i++ { + assertDelay(db.generateCompactionDelay(), c.compactionDelayPercent) + } + } +} + +type blockedResponseRecorder struct { + r *httptest.ResponseRecorder + + // writeblocked is used to block writing until the test wants it to resume. + writeBlocked chan struct{} + // writeStarted is closed by blockedResponseRecorder to signal that writing has started. + writeStarted chan struct{} +} + +func (br *blockedResponseRecorder) Write(buf []byte) (int, error) { + select { + case <-br.writeStarted: + default: + close(br.writeStarted) + } + + <-br.writeBlocked + return br.r.Write(buf) +} + +func (br *blockedResponseRecorder) Header() http.Header { return br.r.Header() } + +func (br *blockedResponseRecorder) WriteHeader(code int) { br.r.WriteHeader(code) } + +func (br *blockedResponseRecorder) Flush() { br.r.Flush() } + +// TestBlockClosingBlockedDuringRemoteRead ensures that a TSDB Block is not closed while it is being queried +// through remote read. This is a regression test for https://github.com/prometheus/prometheus/issues/14422. +// TODO: Ideally, this should reside in storage/remote/read_handler_test.go once the necessary TSDB utils are accessible there. +func TestBlockClosingBlockedDuringRemoteRead(t *testing.T) { + dir := t.TempDir() + + createBlock(t, dir, genSeries(2, 1, 0, 10)) + db, err := Open(dir, nil, nil, nil, nil) + require.NoError(t, err) + // No error checking as manually closing the block is supposed to make this fail. + defer db.Close() + + readAPI := remote.NewReadHandler(nil, nil, db, func() config.Config { + return config.Config{} + }, + 0, 1, 0, + ) + + matcher, err := labels.NewMatcher(labels.MatchRegexp, "__name__", ".*") + require.NoError(t, err) + + query, err := remote.ToQuery(0, 10, []*labels.Matcher{matcher}, nil) + require.NoError(t, err) + + req := &prompb.ReadRequest{ + Queries: []*prompb.Query{query}, + AcceptedResponseTypes: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS}, + } + data, err := proto.Marshal(req) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, "", bytes.NewBuffer(snappy.Encode(nil, data))) + require.NoError(t, err) + + blockedRecorder := &blockedResponseRecorder{ + r: httptest.NewRecorder(), + writeBlocked: make(chan struct{}), + writeStarted: make(chan struct{}), + } + + readDone := make(chan struct{}) + go func() { + readAPI.ServeHTTP(blockedRecorder, request) + require.Equal(t, http.StatusOK, blockedRecorder.r.Code) + close(readDone) + }() + + // Wait for the read API to start streaming data. + <-blockedRecorder.writeStarted + + // Try to close the queried block. + blockClosed := make(chan struct{}) + go func() { + for _, block := range db.Blocks() { + block.Close() + } + close(blockClosed) + }() + + // Closing the queried block should block. + // Wait a little bit to make sure of that. + select { + case <-time.After(100 * time.Millisecond): + case <-readDone: + require.Fail(t, "read API should still be streaming data.") + case <-blockClosed: + require.Fail(t, "Block shouldn't get closed while being queried.") + } + + // Resume the read API data streaming. + close(blockedRecorder.writeBlocked) + <-readDone + + // The block should be no longer needed and closing it should end. + select { + case <-time.After(10 * time.Millisecond): + require.Fail(t, "Closing the block timed out.") + case <-blockClosed: } } diff --git a/tsdb/docs/format/chunks.md b/tsdb/docs/format/chunks.md index 8318e0a540..7eb0820e44 100644 --- a/tsdb/docs/format/chunks.md +++ b/tsdb/docs/format/chunks.md @@ -29,14 +29,15 @@ in-file offset (lower 4 bytes) and segment sequence number (upper 4 bytes). # Chunk ``` -┌───────────────┬───────────────────┬──────────────┬────────────────┐ -│ len │ encoding <1 byte> │ data │ CRC32 <4 byte> │ -└───────────────┴───────────────────┴──────────────┴────────────────┘ +┌───────────────┬───────────────────┬─────────────┬────────────────┐ +│ len │ encoding <1 byte> │ data │ CRC32 <4 byte> │ +└───────────────┴───────────────────┴─────────────┴────────────────┘ ``` Notes: * `` has 1 to 10 bytes. -* `encoding`: Currently either `XOR` or `histogram`. +* `encoding`: Currently either `XOR`, `histogram`, or `floathistogram`, see + [code for numerical values](https://github.com/prometheus/prometheus/blob/02d0de9987ad99dee5de21853715954fadb3239f/tsdb/chunkenc/chunk.go#L28-L47). * `data`: See below for each encoding. ## XOR chunk data @@ -67,9 +68,9 @@ Notes: ## Histogram chunk data ``` -┌──────────────────────┬──────────────────────────┬───────────────────────────────┬─────────────────────┬──────────────────┬──────────────────┬────────────────┬──────────────────┐ -│ num_samples │ histogram_flags <1 byte> │ zero_threshold <1 or 9 bytes> │ schema │ pos_spans │ neg_spans │ samples │ padding │ -└──────────────────────┴──────────────────────────┴───────────────────────────────┴─────────────────────┴──────────────────┴──────────────────┴────────────────┴──────────────────┘ +┌──────────────────────┬──────────────────────────┬───────────────────────────────┬─────────────────────┬──────────────────┬──────────────────┬──────────────────────┬────────────────┬──────────────────┐ +│ num_samples │ histogram_flags <1 byte> │ zero_threshold <1 or 9 bytes> │ schema │ pos_spans │ neg_spans │ custom_values │ samples │ padding │ +└──────────────────────┴──────────────────────────┴───────────────────────────────┴─────────────────────┴──────────────────┴──────────────────┴──────────────────────┴────────────────┴──────────────────┘ ``` ### Positive and negative spans data: @@ -80,6 +81,16 @@ Notes: └─────────────────────────┴────────────────────────┴───────────────────────┴────────────────────────┴───────────────────────┴─────┴────────────────────────┴───────────────────────┘ ``` +### Custom values data: + +The `custom_values` data is currently only used for schema -53 (custom bucket boundaries). For other schemas, it is empty (length of zero). + +``` +┌──────────────────────────┬──────────────────┬──────────────────┬─────┬──────────────────┐ +│ num_values │ value_0 │ value_1 │ ... │ value_n │ +└──────────────────────────┴─────────────────────────────────────┴─────┴──────────────────┘ +``` + ### Samples data: ``` @@ -92,7 +103,7 @@ Notes: ├──────────────────────────┤ │ ... │ ├──────────────────────────┤ -│ Sample_n │ +│ sample_n │ └──────────────────────────┘ ``` @@ -107,9 +118,9 @@ Notes: #### Sample 1 data: ``` -┌────────────────────────┬───────────────────────────┬────────────────────────────────┬──────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┐ -│ ts_delta │ count_delta │ zero_count_delta │ sum_xor │ pos_bucket_0_delta │ ... │ pos_bucket_n_delta │ neg_bucket_0_delta │ ... │ neg_bucket_n_delta │ -└────────────────────────┴───────────────────────────┴────────────────────────────────┴──────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┘ +┌───────────────────────┬──────────────────────────┬───────────────────────────────┬──────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┐ +│ ts_delta │ count_delta │ zero_count_delta │ sum_xor │ pos_bucket_0_delta │ ... │ pos_bucket_n_delta │ neg_bucket_0_delta │ ... │ neg_bucket_n_delta │ +└───────────────────────┴──────────────────────────┴───────────────────────────────┴──────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┘ ``` #### Sample 2 data and following: @@ -131,7 +142,9 @@ Notes: * If 0, it is a single zero byte. * If a power of two between 2^-243 and 2^10, it is a single byte between 1 and 254. * Otherwise, it is a byte with all bits set (255), followed by a float64, resulting in 9 bytes length. -* `schema` is a specific value defined by the exposition format. Currently valid values are -4 <= n <= 8. +* `schema` is a specific value defined by the exposition format. Currently + valid values are either -4 <= n <= 8 (standard exponential schemas) or -53 + (custom bucket boundaries). * `` is a variable bitwidth encoding for signed integers, optimized for “delta of deltas” of bucket deltas. It has between 1 bit and 9 bytes. See [code for details](https://github.com/prometheus/prometheus/blob/8c1507ebaa4ca552958ffb60c2d1b21afb7150e4/tsdb/chunkenc/varbit.go#L31-L60). * `` is a variable bitwidth encoding for unsigned integers with the same bit-bucketing as ``. @@ -142,3 +155,69 @@ Notes: * Note that buckets are inherently deltas between the current bucket and the previous bucket. Only `bucket_0` is an absolute count. * The chunk can have as few as one sample, i.e. sample 1 and following are optional. * Similarly, there could be down to zero spans and down to zero buckets. + +The `` encoding within the custom values data depends on the schema. +For schema -53 (custom bucket boundaries, currently the only use case for +custom values), the values to encode are bucket boundaries in the form of +floats. The encoding of a given float value _x_ works as follows: + +1. Create an intermediate value _y_ = _x_ * 1000. +2. If 0 ≤ _y_ ≤ 33554430 _and_ if the decimal value of _y_ is integer, store + _y_ + 1 as ``. +3. Otherwise, store a 0 bit, followed by the 64 bit of the original _x_ + encoded as plain ``. + +Note that values stored as per (2) will always start with a 1 bit, which allow +decoders to recognize this case in contrast to values stores as per (3), which +always start with a 0 bit. + +The rational behind this encoding is that most custom bucket boundaries are set +by humans as decimal numbers with not very many decimal places. In most cases, +the encoding will therefore result in a short varbit representation. The upper +bound of 33554430 is picked so that the varbit encoded value will take at most +4 bytes. + + +## Float histogram chunk data + +Float histograms have the same layout as histograms apart from the encoding of samples. + +### Samples data: + +``` +┌──────────────────────────┐ +│ sample_0 │ +├──────────────────────────┤ +│ sample_1 │ +├──────────────────────────┤ +│ sample_2 │ +├──────────────────────────┤ +│ ... │ +├──────────────────────────┤ +│ sample_n │ +└──────────────────────────┘ +``` + +#### Sample 0 data: + +``` +┌─────────────────┬─────────────────┬──────────────────────┬───────────────┬────────────────────────┬─────┬────────────────────────┬────────────────────────┬─────┬────────────────────────┐ +│ ts │ count │ zero_count │ sum │ pos_bucket_0 │ ... │ pos_bucket_n │ neg_bucket_0 │ ... │ neg_bucket_n │ +└─────────────────┴─────────────────┴──────────────────────┴───────────────┴────────────────────────┴─────┴────────────────────────┴────────────────────────┴─────┴────────────────────────┘ +``` + +#### Sample 1 data: + +``` +┌───────────────────────┬────────────────────────┬─────────────────────────────┬──────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┐ +│ ts_delta │ count_xor │ zero_count_xor │ sum_xor │ pos_bucket_0_xor │ ... │ pos_bucket_n_xor │ neg_bucket_0_xor │ ... │ neg_bucket_n_xor │ +└───────────────────────┴────────────────────────┴─────────────────────────────┴──────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┘ +``` + +#### Sample 2 data and following: + +``` +┌─────────────────────┬────────────────────────┬─────────────────────────────┬──────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┐ +│ ts_dod │ count_xor │ zero_count_xor │ sum_xor │ pos_bucket_0_xor │ ... │ pos_bucket_n_xor │ neg_bucket_0_xor │ ... │ neg_bucket_n_xor │ +└─────────────────────┴────────────────────────┴─────────────────────────────┴──────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┘ +``` diff --git a/tsdb/docs/format/head_chunks.md b/tsdb/docs/format/head_chunks.md index 5737f42058..7040dcf41a 100644 --- a/tsdb/docs/format/head_chunks.md +++ b/tsdb/docs/format/head_chunks.md @@ -37,3 +37,7 @@ is used while replaying the chunks. | series ref <8 byte> | mint <8 byte, uint64> | maxt <8 byte, uint64> | encoding <1 byte> | len | data │ CRC32 <4 byte> │ └─────────────────────┴───────────────────────┴───────────────────────┴───────────────────┴───────────────┴──────────────┴────────────────┘ ``` + +## OOO encoding + +Head chunks use the highest bit of the `encoding` field to indicate whether it is out-of-order (1) or not (0). This bit is not set for chunks in the on-disk blocks. diff --git a/tsdb/encoding/encoding.go b/tsdb/encoding/encoding.go index a7ce4e81e0..cc7d0990f6 100644 --- a/tsdb/encoding/encoding.go +++ b/tsdb/encoding/encoding.go @@ -20,7 +20,6 @@ import ( "hash" "hash/crc32" "math" - "unsafe" "github.com/dennwc/varint" ) @@ -75,8 +74,7 @@ func (e *Encbuf) PutVarint64(x int64) { // PutUvarintStr writes a string to the buffer prefixed by its varint length (in bytes!). func (e *Encbuf) PutUvarintStr(s string) { - b := *(*[]byte)(unsafe.Pointer(&s)) - e.PutUvarint(len(b)) + e.PutUvarint(len(s)) e.PutString(s) } diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 7545ab9a60..31d461bed9 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -29,7 +29,7 @@ import ( ) const ( - // Indicates that there is no index entry for an exmplar. + // Indicates that there is no index entry for an exemplar. noExemplar = -1 // Estimated number of exemplars per series, for sizing the index. estimatedExemplarsPerSeries = 16 @@ -152,13 +152,13 @@ func (ce *CircularExemplarStorage) Querier(_ context.Context) (storage.ExemplarQ func (ce *CircularExemplarStorage) Select(start, end int64, matchers ...[]*labels.Matcher) ([]exemplar.QueryResult, error) { ret := make([]exemplar.QueryResult, 0) + ce.lock.RLock() + defer ce.lock.RUnlock() + if len(ce.exemplars) == 0 { return ret, nil } - ce.lock.RLock() - defer ce.lock.RUnlock() - // Loop through each index entry, which will point us to first/last exemplar for each series. for _, idx := range ce.index { var se exemplar.QueryResult @@ -281,13 +281,13 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { l = 0 } + ce.lock.Lock() + defer ce.lock.Unlock() + if l == int64(len(ce.exemplars)) { return 0 } - ce.lock.Lock() - defer ce.lock.Unlock() - oldBuffer := ce.exemplars oldNextIndex := int64(ce.nextIndex) @@ -349,6 +349,11 @@ func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry, buf []byt } func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemplar) error { + // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. + // Optimize by moving the lock to be per series (& benchmark it). + ce.lock.Lock() + defer ce.lock.Unlock() + if len(ce.exemplars) == 0 { return storage.ErrExemplarsDisabled } @@ -356,11 +361,6 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp var buf [1024]byte seriesLabels := l.Bytes(buf[:]) - // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. - // Optimize by moving the lock to be per series (& benchmark it). - ce.lock.Lock() - defer ce.lock.Unlock() - idx, ok := ce.index[string(seriesLabels)] err := ce.validateExemplar(idx, e, true) if err != nil { diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index 7723ec3894..dbd34cc48c 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -20,6 +20,7 @@ import ( "reflect" "strconv" "strings" + "sync" "testing" "github.com/prometheus/client_golang/prometheus" @@ -499,3 +500,40 @@ func BenchmarkResizeExemplars(b *testing.B) { }) } } + +// TestCircularExemplarStorage_Concurrent_AddExemplar_Resize tries to provoke a data race between AddExemplar and Resize. +// Run with race detection enabled. +func TestCircularExemplarStorage_Concurrent_AddExemplar_Resize(t *testing.T) { + exs, err := NewCircularExemplarStorage(0, eMetrics) + require.NoError(t, err) + es := exs.(*CircularExemplarStorage) + + l := labels.FromStrings("service", "asdf") + e := exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", "qwerty"), + Value: 0.1, + Ts: 1, + } + + var wg sync.WaitGroup + wg.Add(1) + t.Cleanup(wg.Wait) + + started := make(chan struct{}) + + go func() { + defer wg.Done() + + <-started + for i := 0; i < 100; i++ { + require.NoError(t, es.AddExemplar(l, e)) + } + }() + + for i := 0; i < 100; i++ { + es.Resize(int64(i + 1)) + if i == 0 { + close(started) + } + } +} diff --git a/tsdb/head.go b/tsdb/head.go index 4ff7aab632..c67c438e52 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "path/filepath" "runtime" @@ -25,12 +26,11 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "go.uber.org/atomic" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" @@ -84,7 +84,7 @@ type Head struct { wal, wbl *wlog.WL exemplarMetrics *ExemplarMetrics exemplars ExemplarStorage - logger log.Logger + logger *slog.Logger appendPool zeropool.Pool[[]record.RefSample] exemplarsPool zeropool.Pool[[]exemplarWithSeriesRef] histogramsPool zeropool.Pool[[]record.RefHistogramSample] @@ -150,9 +150,10 @@ type HeadOptions struct { // EnableNativeHistograms enables the ingestion of native histograms. EnableNativeHistograms atomic.Bool - // EnableCreatedTimestampZeroIngestion enables the ingestion of the created timestamp as a synthetic zero sample. - // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md - EnableCreatedTimestampZeroIngestion bool + // EnableOOONativeHistograms enables the ingestion of OOO native histograms. + // It will only take effect if EnableNativeHistograms is set to true and the + // OutOfOrderTimeWindow is > 0 + EnableOOONativeHistograms atomic.Bool ChunkRange int64 // ChunkDirRoot is the parent directory of the chunks directory. @@ -222,10 +223,10 @@ type SeriesLifecycleCallback interface { } // NewHead opens the head block in dir. -func NewHead(r prometheus.Registerer, l log.Logger, wal, wbl *wlog.WL, opts *HeadOptions, stats *HeadStats) (*Head, error) { +func NewHead(r prometheus.Registerer, l *slog.Logger, wal, wbl *wlog.WL, opts *HeadOptions, stats *HeadStats) (*Head, error) { var err error if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if opts.OutOfOrderTimeWindow.Load() < 0 { @@ -561,7 +562,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { }, func() float64 { val, err := h.chunkDiskMapper.Size() if err != nil { - level.Error(h.logger).Log("msg", "Failed to calculate size of \"chunks_head\" dir", + h.logger.Error("Failed to calculate size of \"chunks_head\" dir", "err", err.Error()) } return float64(val) @@ -624,7 +625,7 @@ func (h *Head) Init(minValidTime int64) error { } }() - level.Info(h.logger).Log("msg", "Replaying on-disk memory mappable chunks if any") + h.logger.Info("Replaying on-disk memory mappable chunks if any") start := time.Now() snapIdx, snapOffset := -1, 0 @@ -633,7 +634,7 @@ func (h *Head) Init(minValidTime int64) error { snapshotLoaded := false var chunkSnapshotLoadDuration time.Duration if h.opts.EnableMemorySnapshotOnShutdown { - level.Info(h.logger).Log("msg", "Chunk snapshot is enabled, replaying from the snapshot") + h.logger.Info("Chunk snapshot is enabled, replaying from the snapshot") // If there are any WAL files, there should be at least one WAL file with an index that is current or newer // than the snapshot index. If the WAL index is behind the snapshot index somehow, the snapshot is assumed // to be outdated. @@ -646,14 +647,14 @@ func (h *Head) Init(minValidTime int64) error { _, idx, _, err := LastChunkSnapshot(h.opts.ChunkDirRoot) if err != nil && !errors.Is(err, record.ErrNotFound) { - level.Error(h.logger).Log("msg", "Could not find last snapshot", "err", err) + h.logger.Error("Could not find last snapshot", "err", err) } if err == nil && endAt < idx { loadSnapshot = false - level.Warn(h.logger).Log("msg", "Last WAL file is behind snapshot, removing snapshots") + h.logger.Warn("Last WAL file is behind snapshot, removing snapshots") if err := DeleteChunkSnapshots(h.opts.ChunkDirRoot, math.MaxInt, math.MaxInt); err != nil { - level.Error(h.logger).Log("msg", "Error while deleting snapshot directories", "err", err) + h.logger.Error("Error while deleting snapshot directories", "err", err) } } } @@ -663,14 +664,14 @@ func (h *Head) Init(minValidTime int64) error { if err == nil { snapshotLoaded = true chunkSnapshotLoadDuration = time.Since(start) - level.Info(h.logger).Log("msg", "Chunk snapshot loading time", "duration", chunkSnapshotLoadDuration.String()) + h.logger.Info("Chunk snapshot loading time", "duration", chunkSnapshotLoadDuration.String()) } if err != nil { snapIdx, snapOffset = -1, 0 refSeries = make(map[chunks.HeadSeriesRef]*memSeries) h.metrics.snapshotReplayErrorTotal.Inc() - level.Error(h.logger).Log("msg", "Failed to load chunk snapshot", "err", err) + h.logger.Error("Failed to load chunk snapshot", "err", err) // We clear the partially loaded data to replay fresh from the WAL. if err := h.resetInMemoryState(); err != nil { return err @@ -694,7 +695,7 @@ func (h *Head) Init(minValidTime int64) error { mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.loadMmappedChunks(refSeries) if err != nil { // TODO(codesome): clear out all m-map chunks here for refSeries. - level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err) + h.logger.Error("Loading on-disk chunks failed", "err", err) var cerr *chunks.CorruptionErr if errors.As(err, &cerr) { h.metrics.mmapChunkCorruptionTotal.Inc() @@ -711,15 +712,15 @@ func (h *Head) Init(minValidTime int64) error { } } mmapChunkReplayDuration = time.Since(mmapChunkReplayStart) - level.Info(h.logger).Log("msg", "On-disk memory mappable chunks replay completed", "duration", mmapChunkReplayDuration.String()) + h.logger.Info("On-disk memory mappable chunks replay completed", "duration", mmapChunkReplayDuration.String()) } if h.wal == nil { - level.Info(h.logger).Log("msg", "WAL not found") + h.logger.Info("WAL not found") return nil } - level.Info(h.logger).Log("msg", "Replaying WAL, this may take a while") + h.logger.Info("Replaying WAL, this may take a while") checkpointReplayStart := time.Now() // Backfill the checkpoint first if it exists. @@ -745,7 +746,7 @@ func (h *Head) Init(minValidTime int64) error { } defer func() { if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wal segments reader", "err", err) + h.logger.Warn("Error while closing the wal segments reader", "err", err) } }() @@ -756,7 +757,7 @@ func (h *Head) Init(minValidTime int64) error { } h.updateWALReplayStatusRead(startFrom) startFrom++ - level.Info(h.logger).Log("msg", "WAL checkpoint loaded") + h.logger.Info("WAL checkpoint loaded") } checkpointReplayDuration := time.Since(checkpointReplayStart) @@ -786,12 +787,12 @@ func (h *Head) Init(minValidTime int64) error { } err = h.loadWAL(wlog.NewReader(sr), syms, multiRef, mmappedChunks, oooMmappedChunks) if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wal segments reader", "err", err) + h.logger.Warn("Error while closing the wal segments reader", "err", err) } if err != nil { return err } - level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", endAt) + h.logger.Info("WAL segment loaded", "segment", i, "maxSegment", endAt) h.updateWALReplayStatusRead(i) } walReplayDuration := time.Since(walReplayStart) @@ -814,12 +815,12 @@ func (h *Head) Init(minValidTime int64) error { sr := wlog.NewSegmentBufReader(s) err = h.loadWBL(wlog.NewReader(sr), syms, multiRef, lastMmapRef) if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wbl segments reader", "err", err) + h.logger.Warn("Error while closing the wbl segments reader", "err", err) } if err != nil { return &errLoadWbl{err} } - level.Info(h.logger).Log("msg", "WBL segment loaded", "segment", i, "maxSegment", endAt) + h.logger.Info("WBL segment loaded", "segment", i, "maxSegment", endAt) h.updateWALReplayStatusRead(i) } } @@ -828,8 +829,8 @@ func (h *Head) Init(minValidTime int64) error { totalReplayDuration := time.Since(start) h.metrics.dataTotalReplayDuration.Set(totalReplayDuration.Seconds()) - level.Info(h.logger).Log( - "msg", "WAL replay completed", + h.logger.Info( + "WAL replay completed", "checkpoint_replay_duration", checkpointReplayDuration.String(), "wal_replay_duration", walReplayDuration.String(), "wbl_replay_duration", wblReplayDuration.String(), @@ -939,28 +940,28 @@ func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries) // removeCorruptedMmappedChunks attempts to delete the corrupted mmapped chunks and if it fails, it clears all the previously // loaded mmapped chunks. func (h *Head) removeCorruptedMmappedChunks(err error) (map[chunks.HeadSeriesRef][]*mmappedChunk, map[chunks.HeadSeriesRef][]*mmappedChunk, chunks.ChunkDiskMapperRef, error) { - level.Info(h.logger).Log("msg", "Deleting mmapped chunk files") + h.logger.Info("Deleting mmapped chunk files") // We never want to preserve the in-memory series from snapshots if we are repairing m-map chunks. if err := h.resetInMemoryState(); err != nil { return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, err } - level.Info(h.logger).Log("msg", "Deleting mmapped chunk files") + h.logger.Info("Deleting mmapped chunk files") if err := h.chunkDiskMapper.DeleteCorrupted(err); err != nil { - level.Info(h.logger).Log("msg", "Deletion of corrupted mmap chunk files failed, discarding chunk files completely", "err", err) + h.logger.Info("Deletion of corrupted mmap chunk files failed, discarding chunk files completely", "err", err) if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil { - level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed", "err", err) + h.logger.Error("Deletion of all mmap chunk files failed", "err", err) } return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, nil } - level.Info(h.logger).Log("msg", "Deletion of mmap chunk files successful, reattempting m-mapping the on-disk chunks") + h.logger.Info("Deletion of mmap chunk files successful, reattempting m-mapping the on-disk chunks") mmappedChunks, oooMmappedChunks, lastRef, err := h.loadMmappedChunks(make(map[chunks.HeadSeriesRef]*memSeries)) if err != nil { - level.Error(h.logger).Log("msg", "Loading on-disk chunks failed, discarding chunk files completely", "err", err) + h.logger.Error("Loading on-disk chunks failed, discarding chunk files completely", "err", err) if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil { - level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed after failed loading", "err", err) + h.logger.Error("Deletion of all mmap chunk files failed after failed loading", "err", err) } mmappedChunks = map[chunks.HeadSeriesRef][]*mmappedChunk{} } @@ -995,7 +996,7 @@ func (h *Head) ApplyConfig(cfg *config.Config, wbl *wlog.WL) { } migrated := h.exemplars.(*CircularExemplarStorage).Resize(newSize) - level.Info(h.logger).Log("msg", "Exemplar storage resized", "from", prevSize, "to", newSize, "migrated", migrated) + h.logger.Info("Exemplar storage resized", "from", prevSize, "to", newSize, "migrated", migrated) } // SetOutOfOrderTimeWindow updates the out of order related parameters. @@ -1018,6 +1019,16 @@ func (h *Head) DisableNativeHistograms() { h.opts.EnableNativeHistograms.Store(false) } +// EnableOOONativeHistograms enables the ingestion of out-of-order native histograms. +func (h *Head) EnableOOONativeHistograms() { + h.opts.EnableOOONativeHistograms.Store(true) +} + +// DisableOOONativeHistograms disables the ingestion of out-of-order native histograms. +func (h *Head) DisableOOONativeHistograms() { + h.opts.EnableOOONativeHistograms.Store(false) +} + // PostingsCardinalityStats returns highest cardinality stats by label and value names. func (h *Head) PostingsCardinalityStats(statsByLabelName string, limit int) *index.PostingsStats { cacheKey := statsByLabelName + ";" + strconv.Itoa(limit) @@ -1296,7 +1307,7 @@ func (h *Head) truncateWAL(mint int64) error { // If truncating fails, we'll just try again at the next checkpoint. // Leftover segments will just be ignored in the future if there's a checkpoint // that supersedes them. - level.Error(h.logger).Log("msg", "truncating segments failed", "err", err) + h.logger.Error("truncating segments failed", "err", err) } // The checkpoint is written and segments before it is truncated, so we no @@ -1314,12 +1325,12 @@ func (h *Head) truncateWAL(mint int64) error { // Leftover old checkpoints do not cause problems down the line beyond // occupying disk space. // They will just be ignored since a higher checkpoint exists. - level.Error(h.logger).Log("msg", "delete old checkpoints", "err", err) + h.logger.Error("delete old checkpoints", "err", err) h.metrics.checkpointDeleteFail.Inc() } h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) - level.Info(h.logger).Log("msg", "WAL checkpoint complete", + h.logger.Info("WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) return nil @@ -1357,7 +1368,7 @@ func (h *Head) truncateSeriesAndChunkDiskMapper(caller string) error { start := time.Now() headMaxt := h.MaxTime() actualMint, minOOOTime, minMmapFile := h.gc() - level.Info(h.logger).Log("msg", "Head GC completed", "caller", caller, "duration", time.Since(start)) + h.logger.Info("Head GC completed", "caller", caller, "duration", time.Since(start)) h.metrics.gcDuration.Observe(time.Since(start).Seconds()) if actualMint > h.minTime.Load() { @@ -1509,7 +1520,7 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match series := h.series.getByID(chunks.HeadSeriesRef(p.At())) if series == nil { - level.Debug(h.logger).Log("msg", "Series not found in Head.Delete") + h.logger.Debug("Series not found in Head.Delete") continue } @@ -2066,6 +2077,17 @@ func (s sample) Type() chunkenc.ValueType { } } +func (s sample) Copy() chunks.Sample { + c := sample{t: s.t, f: s.f} + if s.h != nil { + c.h = s.h.Copy() + } + if s.fh != nil { + c.fh = s.fh.Copy() + } + return c +} + // memSeries is the in-memory representation of a series. None of its methods // are goroutine safe and it is the caller's responsibility to lock it. type memSeries struct { @@ -2090,7 +2112,7 @@ type memSeries struct { // before compaction: mmappedChunks=[p5,p6,p7,p8,p9] firstChunkID=5 // after compaction: mmappedChunks=[p7,p8,p9] firstChunkID=7 // - // pN is the pointer to the mmappedChunk referered to by HeadChunkID=N + // pN is the pointer to the mmappedChunk referred to by HeadChunkID=N mmappedChunks []*mmappedChunk // Most recent chunks in memory that are still being built or waiting to be mmapped. // This is a linked list, headChunks points to the most recent chunk, headChunks.next points diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 988ce9397e..603b96cfcc 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -17,11 +17,9 @@ import ( "context" "errors" "fmt" + "log/slog" "math" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -42,6 +40,12 @@ type initAppender struct { var _ storage.GetRef = &initAppender{} +func (a *initAppender) SetOptions(opts *storage.AppendOptions) { + if a.app != nil { + a.app.SetOptions(opts) + } +} + func (a *initAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if a.app != nil { return a.app.Append(ref, lset, t, v) @@ -79,6 +83,16 @@ func (a *initAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t return a.app.AppendHistogram(ref, l, t, h, fh) } +func (a *initAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if a.app != nil { + return a.app.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) + } + a.head.initTime(t) + a.app = a.head.appender() + + return a.app.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) +} + func (a *initAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { if a.app != nil { return a.app.UpdateMetadata(ref, l, m) @@ -318,11 +332,16 @@ type headAppender struct { appendID, cleanupAppendIDsBelow uint64 closed bool + hints *storage.AppendOptions +} + +func (a *headAppender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts } func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - // For OOO inserts, this restriction is irrelevant and will be checked later once we confirm the sample is an in-order append. - // If OOO inserts are disabled, we may as well as check this as early as we can and avoid more work. + // Fail fast if OOO is disabled and the sample is out of bounds. + // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. if a.oooTimeWindow == 0 && t < a.minValidTime { a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Inc() return 0, storage.ErrOutOfBounds @@ -331,13 +350,17 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) if s == nil { var err error - s, err = a.getOrCreate(lset) + s, _, err = a.getOrCreate(lset) if err != nil { return 0, err } } if value.IsStaleNaN(v) { + // This is not thread safe as we should be holding the lock for "s". + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we could do this conversion + // in commit. This code should move into Commit(). switch { case s.lastHistogramValue != nil: return a.AppendHistogram(ref, lset, t, &histogram.Histogram{Sum: v}, nil) @@ -347,13 +370,18 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 } s.Lock() + + defer s.Unlock() // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise // to skip that sample from the WAL and write only in the WBL. - _, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) + isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) if err == nil { + if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder { + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + return 0, storage.ErrOutOfOrderSample + } s.pendingCommit = true } - s.Unlock() if delta > 0 { a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) } @@ -388,13 +416,13 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 // storage.CreatedTimestampAppender.AppendCTZeroSample for further documentation. func (a *headAppender) AppendCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64) (storage.SeriesRef, error) { if ct >= t { - return 0, fmt.Errorf("CT is newer or the same as sample's timestamp, ignoring") + return 0, storage.ErrCTNewerThanSample } s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) if s == nil { var err error - s, err = a.getOrCreate(lset) + s, _, err = a.getOrCreate(lset) if err != nil { return 0, err } @@ -424,20 +452,18 @@ func (a *headAppender) AppendCTZeroSample(ref storage.SeriesRef, lset labels.Lab return storage.SeriesRef(s.ref), nil } -func (a *headAppender) getOrCreate(lset labels.Labels) (*memSeries, error) { +func (a *headAppender) getOrCreate(lset labels.Labels) (s *memSeries, created bool, err error) { // Ensure no empty labels have gotten through. lset = lset.WithoutEmpty() if lset.IsEmpty() { - return nil, fmt.Errorf("empty labelset: %w", ErrInvalidSample) + return nil, false, fmt.Errorf("empty labelset: %w", ErrInvalidSample) } if l, dup := lset.HasDuplicateLabelNames(); dup { - return nil, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) + return nil, false, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) } - var created bool - var err error - s, created, err := a.head.getOrCreate(lset.Hash(), lset) + s, created, err = a.head.getOrCreate(lset.Hash(), lset) if err != nil { - return nil, err + return nil, false, err } if created { a.series = append(a.series, record.RefSeries{ @@ -445,12 +471,13 @@ func (a *headAppender) getOrCreate(lset labels.Labels) (*memSeries, error) { Labels: lset, }) } - return s, nil + return s, created, nil } -// appendable checks whether the given sample is valid for appending to the series. (if we return false and no error) -// The sample belongs to the out of order chunk if we return true and no error. -// An error signifies the sample cannot be handled. +// appendable checks whether the given sample is valid for appending to the series. +// If the sample is valid and in-order, it returns false with no error. +// If the sample belongs to the out-of-order chunk, it returns true with no error. +// If the sample cannot be handled, it returns an error. func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTimeWindow int64) (isOOO bool, oooDelta int64, err error) { // Check if we can append in the in-order chunk. if t >= minValidTime { @@ -493,46 +520,94 @@ func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTi return false, headMaxt - t, storage.ErrOutOfOrderSample } -// appendableHistogram checks whether the given histogram is valid for appending to the series. -func (s *memSeries) appendableHistogram(t int64, h *histogram.Histogram) error { - if s.headChunks == nil { - return nil +// appendableHistogram checks whether the given histogram sample is valid for appending to the series. (if we return false and no error) +// The sample belongs to the out of order chunk if we return true and no error. +// An error signifies the sample cannot be handled. +func (s *memSeries) appendableHistogram(t int64, h *histogram.Histogram, headMaxt, minValidTime, oooTimeWindow int64, oooHistogramsEnabled bool) (isOOO bool, oooDelta int64, err error) { + // Check if we can append in the in-order chunk. + if t >= minValidTime { + if s.headChunks == nil { + // The series has no sample and was freshly created. + return false, 0, nil + } + msMaxt := s.maxTime() + if t > msMaxt { + return false, 0, nil + } + if t == msMaxt { + // We are allowing exact duplicates as we can encounter them in valid cases + // like federation and erroring out at that time would be extremely noisy. + // This only checks against the latest in-order sample. + // The OOO headchunk has its own method to detect these duplicates. + if !h.Equals(s.lastHistogramValue) { + return false, 0, storage.ErrDuplicateSampleForTimestamp + } + // Sample is identical (ts + value) with most current (highest ts) sample in sampleBuf. + return false, 0, nil + } } - if t > s.headChunks.maxTime { - return nil - } - if t < s.headChunks.maxTime { - return storage.ErrOutOfOrderSample + // The sample cannot go in the in-order chunk. Check if it can go in the out-of-order chunk. + if oooTimeWindow > 0 && t >= headMaxt-oooTimeWindow { + if !oooHistogramsEnabled { + return true, headMaxt - t, storage.ErrOOONativeHistogramsDisabled + } + return true, headMaxt - t, nil } - // We are allowing exact duplicates as we can encounter them in valid cases - // like federation and erroring out at that time would be extremely noisy. - if !h.Equals(s.lastHistogramValue) { - return storage.ErrDuplicateSampleForTimestamp + // The sample cannot go in both in-order and out-of-order chunk. + if oooTimeWindow > 0 { + return true, headMaxt - t, storage.ErrTooOldSample } - return nil + if t < minValidTime { + return false, headMaxt - t, storage.ErrOutOfBounds + } + return false, headMaxt - t, storage.ErrOutOfOrderSample } -// appendableFloatHistogram checks whether the given float histogram is valid for appending to the series. -func (s *memSeries) appendableFloatHistogram(t int64, fh *histogram.FloatHistogram) error { - if s.headChunks == nil { - return nil +// appendableFloatHistogram checks whether the given float histogram sample is valid for appending to the series. (if we return false and no error) +// The sample belongs to the out of order chunk if we return true and no error. +// An error signifies the sample cannot be handled. +func (s *memSeries) appendableFloatHistogram(t int64, fh *histogram.FloatHistogram, headMaxt, minValidTime, oooTimeWindow int64, oooHistogramsEnabled bool) (isOOO bool, oooDelta int64, err error) { + // Check if we can append in the in-order chunk. + if t >= minValidTime { + if s.headChunks == nil { + // The series has no sample and was freshly created. + return false, 0, nil + } + msMaxt := s.maxTime() + if t > msMaxt { + return false, 0, nil + } + if t == msMaxt { + // We are allowing exact duplicates as we can encounter them in valid cases + // like federation and erroring out at that time would be extremely noisy. + // This only checks against the latest in-order sample. + // The OOO headchunk has its own method to detect these duplicates. + if !fh.Equals(s.lastFloatHistogramValue) { + return false, 0, storage.ErrDuplicateSampleForTimestamp + } + // Sample is identical (ts + value) with most current (highest ts) sample in sampleBuf. + return false, 0, nil + } } - if t > s.headChunks.maxTime { - return nil - } - if t < s.headChunks.maxTime { - return storage.ErrOutOfOrderSample + // The sample cannot go in the in-order chunk. Check if it can go in the out-of-order chunk. + if oooTimeWindow > 0 && t >= headMaxt-oooTimeWindow { + if !oooHistogramsEnabled { + return true, headMaxt - t, storage.ErrOOONativeHistogramsDisabled + } + return true, headMaxt - t, nil } - // We are allowing exact duplicates as we can encounter them in valid cases - // like federation and erroring out at that time would be extremely noisy. - if !fh.Equals(s.lastFloatHistogramValue) { - return storage.ErrDuplicateSampleForTimestamp + // The sample cannot go in both in-order and out-of-order chunk. + if oooTimeWindow > 0 { + return true, headMaxt - t, storage.ErrTooOldSample } - return nil + if t < minValidTime { + return false, headMaxt - t, storage.ErrOutOfBounds + } + return false, headMaxt - t, storage.ErrOutOfOrderSample } // AppendExemplar for headAppender assumes the series ref already exists, and so it doesn't @@ -577,7 +652,9 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels return 0, storage.ErrNativeHistogramsDisabled } - if t < a.minValidTime { + // Fail fast if OOO is disabled and the sample is out of bounds. + // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. + if (a.oooTimeWindow == 0 || !a.head.opts.EnableOOONativeHistograms.Load()) && t < a.minValidTime { a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() return 0, storage.ErrOutOfBounds } @@ -594,50 +671,48 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels } } + var created bool s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) if s == nil { - // Ensure no empty labels have gotten through. - lset = lset.WithoutEmpty() - if lset.IsEmpty() { - return 0, fmt.Errorf("empty labelset: %w", ErrInvalidSample) - } - - if l, dup := lset.HasDuplicateLabelNames(); dup { - return 0, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) - } - - var created bool var err error - s, created, err = a.head.getOrCreate(lset.Hash(), lset) + s, created, err = a.getOrCreate(lset) if err != nil { return 0, err } - if created { - switch { - case h != nil: - s.lastHistogramValue = &histogram.Histogram{} - case fh != nil: - s.lastFloatHistogramValue = &histogram.FloatHistogram{} - } - a.series = append(a.series, record.RefSeries{ - Ref: s.ref, - Labels: lset, - }) - } } switch { case h != nil: s.Lock() - if err := s.appendableHistogram(t, h); err != nil { - s.Unlock() - if errors.Is(err, storage.ErrOutOfOrderSample) { + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastHistogramValue = &histogram.Histogram{} + } + + // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise + // to skip that sample from the WAL and write only in the WBL. + _, delta, err := s.appendableHistogram(t, h, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + s.pendingCommit = true + } + s.Unlock() + if delta > 0 { + a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) + } + if err != nil { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + fallthrough + case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + case errors.Is(err, storage.ErrTooOldSample): + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() } return 0, err } - s.pendingCommit = true - s.Unlock() a.histograms = append(a.histograms, record.RefHistogramSample{ Ref: s.ref, T: t, @@ -646,15 +721,35 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels a.histogramSeries = append(a.histogramSeries, s) case fh != nil: s.Lock() - if err := s.appendableFloatHistogram(t, fh); err != nil { - s.Unlock() - if errors.Is(err, storage.ErrOutOfOrderSample) { + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastFloatHistogramValue = &histogram.FloatHistogram{} + } + + // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise + // to skip that sample from the WAL and write only in the WBL. + _, delta, err := s.appendableFloatHistogram(t, fh, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err == nil { + s.pendingCommit = true + } + s.Unlock() + if delta > 0 { + a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) + } + if err != nil { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + fallthrough + case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + case errors.Is(err, storage.ErrTooOldSample): + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() } return 0, err } - s.pendingCommit = true - s.Unlock() a.floatHistograms = append(a.floatHistograms, record.RefFloatHistogramSample{ Ref: s.ref, T: t, @@ -673,6 +768,102 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels return storage.SeriesRef(s.ref), nil } +func (a *headAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if !a.head.opts.EnableNativeHistograms.Load() { + return 0, storage.ErrNativeHistogramsDisabled + } + + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + var created bool + s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) + if s == nil { + var err error + s, created, err = a.getOrCreate(lset) + if err != nil { + return 0, err + } + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + s.Lock() + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastHistogramValue = zeroHistogram + } + + // Although we call `appendableHistogram` with oooHistogramsEnabled=true, for CTZeroSamples OOO is not allowed. + // We set it to true to make this implementation as close as possible to the float implementation. + isOOO, _, err := s.appendableHistogram(ct, zeroHistogram, a.headMaxt, a.minValidTime, a.oooTimeWindow, true) + if err != nil { + s.Unlock() + if errors.Is(err, storage.ErrOutOfOrderSample) { + return 0, storage.ErrOutOfOrderCT + } + } + // OOO is not allowed because after the first scrape, CT will be the same for most (if not all) future samples. + // This is to prevent the injected zero from being marked as OOO forever. + if isOOO { + s.Unlock() + return 0, storage.ErrOutOfOrderCT + } + s.pendingCommit = true + s.Unlock() + a.histograms = append(a.histograms, record.RefHistogramSample{ + Ref: s.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, s) + case fh != nil: + zeroFloatHistogram := &histogram.FloatHistogram{} + s.Lock() + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastFloatHistogramValue = zeroFloatHistogram + } + + // Although we call `appendableFloatHistogram` with oooHistogramsEnabled=true, for CTZeroSamples OOO is not allowed. + // We set it to true to make this implementation as close as possible to the float implementation. + isOOO, _, err := s.appendableFloatHistogram(ct, zeroFloatHistogram, a.headMaxt, a.minValidTime, a.oooTimeWindow, true) // OOO is not allowed for CTZeroSamples. + if err != nil { + s.Unlock() + if errors.Is(err, storage.ErrOutOfOrderSample) { + return 0, storage.ErrOutOfOrderCT + } + } + // OOO is not allowed because after the first scrape, CT will be the same for most (if not all) future samples. + // This is to prevent the injected zero from being marked as OOO forever. + if isOOO { + s.Unlock() + return 0, storage.ErrOutOfOrderCT + } + s.pendingCommit = true + s.Unlock() + a.floatHistograms = append(a.floatHistograms, record.RefFloatHistogramSample{ + Ref: s.ref, + T: ct, + FH: zeroFloatHistogram, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, s) + } + + if ct > a.maxt { + a.maxt = ct + } + return storage.SeriesRef(s.ref), nil +} + // UpdateMetadata for headAppender assumes the series ref already exists, and so it doesn't // use getOrCreate or make any of the lset sanity checks that Append does. func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels, meta metadata.Metadata) (storage.SeriesRef, error) { @@ -793,6 +984,427 @@ func exemplarsForEncoding(es []exemplarWithSeriesRef) []record.RefExemplar { return ret } +type appenderCommitContext struct { + floatsAppended int + histogramsAppended int + // Number of samples out of order but accepted: with ooo enabled and within time window. + oooFloatsAccepted int + oooHistogramAccepted int + // Number of samples rejected due to: out of order but OOO support disabled. + floatOOORejected int + histoOOORejected int + // Number of samples rejected due to: out of order but too old (OOO support enabled, but outside time window). + floatTooOldRejected int + histoTooOldRejected int + // Number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled). + floatOOBRejected int + histoOOBRejected int + inOrderMint int64 + inOrderMaxt int64 + oooMinT int64 + oooMaxT int64 + wblSamples []record.RefSample + wblHistograms []record.RefHistogramSample + wblFloatHistograms []record.RefFloatHistogramSample + oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef + oooMmapMarkersCount int + oooRecords [][]byte + oooCapMax int64 + appendChunkOpts chunkOpts + enc record.Encoder +} + +// commitExemplars adds all exemplars from headAppender to the head's exemplar storage. +func (a *headAppender) commitExemplars() { + // No errors logging to WAL, so pass the exemplars along to the in memory storage. + for _, e := range a.exemplars { + s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) + if s == nil { + // This is very unlikely to happen, but we have seen it in the wild. + // It means that the series was truncated between AppendExemplar and Commit. + // See TestHeadCompactionWhileAppendAndCommitExemplar. + continue + } + // We don't instrument exemplar appends here, all is instrumented by storage. + if err := a.head.exemplars.AddExemplar(s.labels(), e.exemplar); err != nil { + if errors.Is(err, storage.ErrOutOfOrderExemplar) { + continue + } + a.head.logger.Debug("Unknown error while adding exemplar", "err", err) + } + } +} + +func (acc *appenderCommitContext) collectOOORecords(a *headAppender) { + if a.head.wbl == nil { + // WBL is not enabled. So no need to collect. + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil + acc.oooMmapMarkersCount = 0 + return + } + + // The m-map happens before adding a new sample. So we collect + // the m-map markers first, and then samples. + // WBL Graphically: + // WBL Before this Commit(): [old samples before this commit for chunk 1] + // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] + if acc.oooMmapMarkers != nil { + markers := make([]record.RefMmapMarker, 0, acc.oooMmapMarkersCount) + for ref, mmapRefs := range acc.oooMmapMarkers { + for _, mmapRef := range mmapRefs { + markers = append(markers, record.RefMmapMarker{ + Ref: ref, + MmapRef: mmapRef, + }) + } + } + r := acc.enc.MmapMarkers(markers, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + if len(acc.wblSamples) > 0 { + r := acc.enc.Samples(acc.wblSamples, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblHistograms) > 0 { + r := acc.enc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblFloatHistograms) > 0 { + r := acc.enc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil +} + +// handleAppendableError processes errors encountered during sample appending and updates +// the provided counters accordingly. +// +// Parameters: +// - err: The error encountered during appending. +// - appended: Pointer to the counter tracking the number of successfully appended samples. +// - oooRejected: Pointer to the counter tracking the number of out-of-order samples rejected. +// - oobRejected: Pointer to the counter tracking the number of out-of-bounds samples rejected. +// - tooOldRejected: Pointer to the counter tracking the number of too-old samples rejected. +func handleAppendableError(err error, appended, oooRejected, oobRejected, tooOldRejected *int) { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + *appended-- + *oooRejected++ + case errors.Is(err, storage.ErrOutOfBounds): + *appended-- + *oobRejected++ + case errors.Is(err, storage.ErrTooOldSample): + *appended-- + *tooOldRejected++ + default: + *appended-- + } +} + +// commitSamples processes and commits the samples in the headAppender to the series. +// It handles both in-order and out-of-order samples, updating the appenderCommitContext +// with the results of the append operations. +// +// The function iterates over the samples in the headAppender and attempts to append each sample +// to its corresponding series. It handles various error cases such as out-of-order samples, +// out-of-bounds samples, and too-old samples, updating the appenderCommitContext accordingly. +// +// For out-of-order samples, it checks if the sample can be inserted into the series and updates +// the out-of-order mmap markers if necessary. It also updates the write-ahead log (WBL) samples +// and the minimum and maximum timestamps for out-of-order samples. +// +// For in-order samples, it attempts to append the sample to the series and updates the minimum +// and maximum timestamps for in-order samples. +// +// The function also increments the chunk metrics if a new chunk is created and performs cleanup +// operations on the series after appending the samples. +// +// There are also specific functions to commit histograms and float histograms. +func (a *headAppender) commitSamples(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.samples { + series = a.sampleSeries[i] + series.Lock() + + oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) + if err != nil { + handleAppendableError(err, &acc.floatsAppended, &acc.floatOOORejected, &acc.floatOOBRejected, &acc.floatTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != nil means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblSamples = append(acc.wblSamples, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooFloatsAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.floatsAppended-- + } + default: + ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + // The sample is an exact duplicate, and should be silently dropped. + acc.floatsAppended-- + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitHistograms function, see the commitSamples docs. +func (a *headAppender) commitHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.histograms { + series = a.histogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableHistogram(s.T, s.H, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblHistograms = append(acc.wblHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitFloatHistograms function, see the commitSamples docs. +func (a *headAppender) commitFloatHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.floatHistograms { + series = a.floatHistogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableFloatHistogram(s.T, s.FH, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblFloatHistograms = append(acc.wblFloatHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// commitMetadata commits the metadata for each series in the headAppender. +// It iterates over the metadata slice and updates the corresponding series +// with the new metadata information. The series is locked during the update +// to ensure thread safety. +func (a *headAppender) commitMetadata() { + var series *memSeries + for i, m := range a.metadata { + series = a.metadataSeries[i] + series.Lock() + series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} + series.Unlock() + } +} + // Commit writes to the WAL and adds the data to the Head. // TODO(codesome): Refactor this method to reduce indentation and make it more readable. func (a *headAppender) Commit() (err error) { @@ -810,23 +1422,7 @@ func (a *headAppender) Commit() (err error) { a.head.writeNotified.Notify() } - // No errors logging to WAL, so pass the exemplars along to the in memory storage. - for _, e := range a.exemplars { - s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) - if s == nil { - // This is very unlikely to happen, but we have seen it in the wild. - // It means that the series was truncated between AppendExemplar and Commit. - // See TestHeadCompactionWhileAppendAndCommitExemplar. - continue - } - // We don't instrument exemplar appends here, all is instrumented by storage. - if err := a.head.exemplars.AddExemplar(s.labels(), e.exemplar); err != nil { - if errors.Is(err, storage.ErrOutOfOrderExemplar) { - continue - } - level.Debug(a.head.logger).Log("msg", "Unknown error while adding exemplar", "err", err) - } - } + a.commitExemplars() defer a.head.metrics.activeAppenders.Dec() defer a.head.putAppendBuffer(a.samples) @@ -837,254 +1433,58 @@ func (a *headAppender) Commit() (err error) { defer a.head.putMetadataBuffer(a.metadata) defer a.head.iso.closeAppend(a.appendID) - var ( - floatsAppended = len(a.samples) - histogramsAppended = len(a.histograms) + len(a.floatHistograms) - // number of samples out of order but accepted: with ooo enabled and within time window - oooFloatsAccepted int - // number of samples rejected due to: out of order but OOO support disabled. - floatOOORejected int - histoOOORejected int - // number of samples rejected due to: that are out of order but too old (OOO support enabled, but outside time window) - floatTooOldRejected int - // number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled) - floatOOBRejected int - - inOrderMint int64 = math.MaxInt64 - inOrderMaxt int64 = math.MinInt64 - oooMinT int64 = math.MaxInt64 - oooMaxT int64 = math.MinInt64 - wblSamples []record.RefSample - oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef - oooMmapMarkersCount int - oooRecords [][]byte - oooCapMax = a.head.opts.OutOfOrderCapMax.Load() - series *memSeries - appendChunkOpts = chunkOpts{ + acc := &appenderCommitContext{ + floatsAppended: len(a.samples), + histogramsAppended: len(a.histograms) + len(a.floatHistograms), + inOrderMint: math.MaxInt64, + inOrderMaxt: math.MinInt64, + oooMinT: math.MaxInt64, + oooMaxT: math.MinInt64, + oooCapMax: a.head.opts.OutOfOrderCapMax.Load(), + appendChunkOpts: chunkOpts{ chunkDiskMapper: a.head.chunkDiskMapper, chunkRange: a.head.chunkRange.Load(), samplesPerChunk: a.head.opts.SamplesPerChunk, - } - enc record.Encoder - ) + }, + } + defer func() { - for i := range oooRecords { - a.head.putBytesBuffer(oooRecords[i][:0]) + for i := range acc.oooRecords { + a.head.putBytesBuffer(acc.oooRecords[i][:0]) } }() - collectOOORecords := func() { - if a.head.wbl == nil { - // WBL is not enabled. So no need to collect. - wblSamples = nil - oooMmapMarkers = nil - oooMmapMarkersCount = 0 - return - } - // The m-map happens before adding a new sample. So we collect - // the m-map markers first, and then samples. - // WBL Graphically: - // WBL Before this Commit(): [old samples before this commit for chunk 1] - // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] - if oooMmapMarkers != nil { - markers := make([]record.RefMmapMarker, 0, oooMmapMarkersCount) - for ref, mmapRefs := range oooMmapMarkers { - for _, mmapRef := range mmapRefs { - markers = append(markers, record.RefMmapMarker{ - Ref: ref, - MmapRef: mmapRef, - }) - } - } - r := enc.MmapMarkers(markers, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblSamples) > 0 { - r := enc.Samples(wblSamples, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } + a.commitSamples(acc) + a.commitHistograms(acc) + a.commitFloatHistograms(acc) + a.commitMetadata() - wblSamples = nil - oooMmapMarkers = nil - } - for i, s := range a.samples { - series = a.sampleSeries[i] - series.Lock() + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) + a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.oooHistogramAccepted)) + a.head.updateMinMaxTime(acc.inOrderMint, acc.inOrderMaxt) + a.head.updateMinOOOMaxOOOTime(acc.oooMinT, acc.oooMaxT) - oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - floatsAppended-- - floatOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - floatsAppended-- - floatOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - floatsAppended-- - floatTooOldRejected++ - default: - floatsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != nil means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblSamples = append(wblSamples, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooFloatsAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - floatsAppended-- - } - default: - ok, chunkCreated = series.append(s.T, s.V, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - // The sample is an exact duplicate, and should be silently dropped. - floatsAppended-- - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, s := range a.histograms { - series = a.histogramSeries[i] - series.Lock() - ok, chunkCreated := series.appendHistogram(s.T, s.H, a.appendID, appendChunkOpts) - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - } - - for i, s := range a.floatHistograms { - series = a.floatHistogramSeries[i] - series.Lock() - ok, chunkCreated := series.appendFloatHistogram(s.T, s.FH, a.appendID, appendChunkOpts) - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - } - - for i, m := range a.metadata { - series = a.metadataSeries[i] - series.Lock() - series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} - series.Unlock() - } - - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOORejected)) - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histoOOORejected)) - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOBRejected)) - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatTooOldRejected)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatsAppended)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histogramsAppended)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(oooFloatsAccepted)) - a.head.updateMinMaxTime(inOrderMint, inOrderMaxt) - a.head.updateMinOOOMaxOOOTime(oooMinT, oooMaxT) - - collectOOORecords() + acc.collectOOORecords(a) if a.head.wbl != nil { - if err := a.head.wbl.Log(oooRecords...); err != nil { + if err := a.head.wbl.Log(acc.oooRecords...); err != nil { // TODO(codesome): Currently WBL logging of ooo samples is best effort here since we cannot try logging // until we have found what samples become OOO. We can try having a metric for this failure. // Returning the error here is not correct because we have already put the samples into the memory, // hence the append/insert was a success. - level.Error(a.head.logger).Log("msg", "Failed to log out of order samples into the WAL", "err", err) + a.head.logger.Error("Failed to log out of order samples into the WAL", "err", err) } } return nil } // insert is like append, except it inserts. Used for OOO samples. -func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger log.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) { +func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger *slog.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) { if s.ooo == nil { s.ooo = &memSeriesOOOFields{} } @@ -1446,7 +1846,7 @@ func (s *memSeries) cutNewHeadChunk(mint int64, e chunkenc.Encoding, chunkRange // cutNewOOOHeadChunk cuts a new OOO chunk and m-maps the old chunk. // The caller must ensure that s is locked and s.ooo is not nil. -func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) { +func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) { ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper, logger) s.ooo.oooHeadChunk = &oooHeadChunk{ @@ -1459,7 +1859,7 @@ func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.Chunk } // s must be locked when calling. -func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) []chunks.ChunkDiskMapperRef { +func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) []chunks.ChunkDiskMapperRef { if s.ooo == nil || s.ooo.oooHeadChunk == nil { // OOO is not enabled or there is no head chunk, so nothing to m-map here. return nil @@ -1469,13 +1869,13 @@ func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMap handleChunkWriteError(err) return nil } - chunkRefs := make([]chunks.ChunkDiskMapperRef, 0, 1) + chunkRefs := make([]chunks.ChunkDiskMapperRef, 0, len(chks)) for _, memchunk := range chks { if len(s.ooo.oooMmappedChunks) >= (oooChunkIDMask - 1) { - level.Error(logger).Log("msg", "Too many OOO chunks, dropping data", "series", s.lset.String()) + logger.Error("Too many OOO chunks, dropping data", "series", s.lset.String()) break } - chunkRef := chunkDiskMapper.WriteChunk(s.ref, s.ooo.oooHeadChunk.minTime, s.ooo.oooHeadChunk.maxTime, memchunk.chunk, true, handleChunkWriteError) + chunkRef := chunkDiskMapper.WriteChunk(s.ref, memchunk.minTime, memchunk.maxTime, memchunk.chunk, true, handleChunkWriteError) chunkRefs = append(chunkRefs, chunkRef) s.ooo.oooMmappedChunks = append(s.ooo.oooMmappedChunks, &mmappedChunk{ ref: chunkRef, diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go index a037948100..dc682602b1 100644 --- a/tsdb/head_bench_test.go +++ b/tsdb/head_bench_test.go @@ -14,15 +14,22 @@ package tsdb import ( + "context" "errors" + "fmt" + "math/rand" "strconv" "testing" "github.com/stretchr/testify/require" "go.uber.org/atomic" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/wlog" ) func BenchmarkHeadStripeSeriesCreate(b *testing.B) { @@ -79,6 +86,86 @@ func BenchmarkHeadStripeSeriesCreate_PreCreationFailure(b *testing.B) { } } +func BenchmarkHead_WalCommit(b *testing.B) { + seriesCounts := []int{100, 1000, 10000} + series := genSeries(10000, 10, 0, 0) // Only using the generated labels. + + appendSamples := func(b *testing.B, app storage.Appender, seriesCount int, ts int64) { + var err error + for i, s := range series[:seriesCount] { + var ref storage.SeriesRef + // if i is even, append a sample, else append a histogram. + if i%2 == 0 { + ref, err = app.Append(ref, s.Labels(), ts, float64(ts)) + } else { + h := &histogram.Histogram{ + Count: 7 + uint64(ts*5), + ZeroCount: 2 + uint64(ts), + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{ts + 1, 1, -1, 0}, + } + ref, err = app.AppendHistogram(ref, s.Labels(), ts, h, nil) + } + require.NoError(b, err) + + _, err = app.AppendExemplar(ref, s.Labels(), exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), + Value: rand.Float64(), + Ts: ts, + }) + require.NoError(b, err) + } + } + + for _, seriesCount := range seriesCounts { + b.Run(fmt.Sprintf("%d series", seriesCount), func(b *testing.B) { + for _, commits := range []int64{1, 2} { // To test commits that create new series and when the series already exists. + b.Run(fmt.Sprintf("%d commits", commits), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + b.StopTimer() + h, w := newTestHead(b, 10000, wlog.CompressionNone, false) + b.Cleanup(func() { + if h != nil { + h.Close() + } + if w != nil { + w.Close() + } + }) + app := h.Appender(context.Background()) + + appendSamples(b, app, seriesCount, 0) + + b.StartTimer() + require.NoError(b, app.Commit()) + if commits == 2 { + b.StopTimer() + app = h.Appender(context.Background()) + appendSamples(b, app, seriesCount, 1) + b.StartTimer() + require.NoError(b, app.Commit()) + } + b.StopTimer() + h.Close() + h = nil + w.Close() + w = nil + } + }) + } + }) + } +} + type failingSeriesLifecycleCallback struct{} func (failingSeriesLifecycleCallback) PreCreation(labels.Labels) error { return errors.New("failed") } diff --git a/tsdb/head_dedupelabels.go b/tsdb/head_dedupelabels.go index a16d907261..a75f337224 100644 --- a/tsdb/head_dedupelabels.go +++ b/tsdb/head_dedupelabels.go @@ -16,8 +16,7 @@ package tsdb import ( - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "log/slog" "github.com/prometheus/prometheus/model/labels" ) @@ -31,8 +30,8 @@ func (s *memSeries) labels() labels.Labels { // RebuildSymbolTable goes through all the series in h, build a SymbolTable with all names and values, // replace each series' Labels with one using that SymbolTable. -func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { - level.Info(logger).Log("msg", "RebuildSymbolTable starting") +func (h *Head) RebuildSymbolTable(logger *slog.Logger) *labels.SymbolTable { + logger.Info("RebuildSymbolTable starting") st := labels.NewSymbolTable() builder := labels.NewScratchBuilderWithSymbolTable(st, 0) rebuildLabels := func(lbls labels.Labels) labels.Labels { @@ -66,7 +65,7 @@ func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { if e, ok := h.exemplars.(withReset); ok { e.ResetSymbolTable(st) } - level.Info(logger).Log("msg", "RebuildSymbolTable finished", "size", st.Len()) + logger.Info("RebuildSymbolTable finished", "size", st.Len()) return st } diff --git a/tsdb/head_other.go b/tsdb/head_other.go index fea91530dc..c73872c12e 100644 --- a/tsdb/head_other.go +++ b/tsdb/head_other.go @@ -16,7 +16,7 @@ package tsdb import ( - "github.com/go-kit/log" + "log/slog" "github.com/prometheus/prometheus/model/labels" ) @@ -27,6 +27,6 @@ func (s *memSeries) labels() labels.Labels { } // RebuildSymbolTable is a no-op when not using dedupelabels. -func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { +func (h *Head) RebuildSymbolTable(logger *slog.Logger) *labels.SymbolTable { return nil } diff --git a/tsdb/head_read.go b/tsdb/head_read.go index d81ffbb6a0..29adc3ee74 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -21,8 +21,6 @@ import ( "slices" "sync" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -132,7 +130,7 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { for p.Next() { s := h.head.series.getByID(chunks.HeadSeriesRef(p.At())) if s == nil { - level.Debug(h.head.logger).Log("msg", "Looked up series not found") + h.head.logger.Debug("Looked up series not found") } else { series = append(series, s) } @@ -165,7 +163,7 @@ func (h *headIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCou for p.Next() { s := h.head.series.getByID(chunks.HeadSeriesRef(p.At())) if s == nil { - level.Debug(h.head.logger).Log("msg", "Looked up series not found") + h.head.logger.Debug("Looked up series not found") continue } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 7b5349cfca..cc9daa97fe 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -23,6 +23,7 @@ import ( "path" "path/filepath" "reflect" + "slices" "sort" "strconv" "strings" @@ -1060,7 +1061,7 @@ func TestMemSeries_truncateChunks_scenarios(t *testing.T) { tests := []struct { name string - headChunks int // the number of head chubks to create on memSeries by appending enough samples + headChunks int // the number of head chunks to create on memSeries by appending enough samples mmappedChunks int // the number of mmapped chunks to create on memSeries by appending enough samples truncateBefore int64 // the mint to pass to truncateChunksBefore() expectedTruncated int // the number of chunks that we're expecting be truncated and returned by truncateChunksBefore() @@ -2101,6 +2102,36 @@ func TestHead_LogRollback(t *testing.T) { } } +func TestHead_ReturnsSortedLabelValues(t *testing.T) { + h, _ := newTestHead(t, 1000, wlog.CompressionNone, false) + defer func() { + require.NoError(t, h.Close()) + }() + + h.initTime(0) + + app := h.appender() + for i := 100; i > 0; i-- { + for j := 0; j < 10; j++ { + lset := labels.FromStrings( + "__name__", fmt.Sprintf("metric_%d", i), + "label", fmt.Sprintf("value_%d", j), + ) + _, err := app.Append(0, lset, 2100, 1) + require.NoError(t, err) + } + } + + q, err := NewBlockQuerier(h, 1500, 2500) + require.NoError(t, err) + + res, _, err := q.LabelValues(context.Background(), "__name__", nil) + require.NoError(t, err) + + require.True(t, slices.IsSorted(res)) + require.NoError(t, q.Close()) +} + // TestWalRepair_DecodingError ensures that a repair is run for an error // when decoding a record. func TestWalRepair_DecodingError(t *testing.T) { @@ -2383,8 +2414,7 @@ func TestAddDuplicateLabelName(t *testing.T) { add := func(labels labels.Labels, labelName string) { app := h.Appender(context.Background()) _, err := app.Append(0, labels, 0, 0) - require.Error(t, err) - require.Equal(t, fmt.Sprintf(`label name "%s" is not unique: invalid sample`, labelName), err.Error()) + require.EqualError(t, err, fmt.Sprintf(`label name "%s" is not unique: invalid sample`, labelName)) } add(labels.FromStrings("a", "c", "a", "b"), "a") @@ -2692,15 +2722,32 @@ func TestIsolationWithoutAdd(t *testing.T) { func TestOutOfOrderSamplesMetric(t *testing.T) { for name, scenario := range sampleTypeScenarios { t.Run(name, func(t *testing.T) { - testOutOfOrderSamplesMetric(t, scenario) + options := DefaultOptions() + options.EnableNativeHistograms = true + options.EnableOOONativeHistograms = true + testOutOfOrderSamplesMetric(t, scenario, options, storage.ErrOutOfOrderSample) }) } } -func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario) { - dir := t.TempDir() +func TestOutOfOrderSamplesMetricNativeHistogramOOODisabled(t *testing.T) { + for name, scenario := range sampleTypeScenarios { + if scenario.sampleType != "histogram" { + continue + } + t.Run(name, func(t *testing.T) { + options := DefaultOptions() + options.OutOfOrderTimeWindow = (1000 * time.Minute).Milliseconds() + options.EnableNativeHistograms = true + options.EnableOOONativeHistograms = false + testOutOfOrderSamplesMetric(t, scenario, options, storage.ErrOOONativeHistogramsDisabled) + }) + } +} - db, err := Open(dir, nil, nil, DefaultOptions(), nil) +func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, options *Options, expectOutOfOrderError error) { + dir := t.TempDir() + db, err := Open(dir, nil, nil, options, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) @@ -2724,15 +2771,15 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario) { require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) app = db.Appender(ctx) _, err = appendSample(app, 2) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, 3) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, 4) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 3.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.NoError(t, app.Commit()) @@ -2767,15 +2814,15 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario) { // Test out of order metric. app = db.Appender(ctx) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+2) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 4.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+3) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 5.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+4) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 6.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.NoError(t, app.Commit()) } @@ -4626,10 +4673,172 @@ func TestHistogramCounterResetHeader(t *testing.T) { } } +func TestOOOHistogramCounterResetHeaders(t *testing.T) { + for _, floatHisto := range []bool{true, false} { + t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { + l := labels.FromStrings("a", "b") + head, _ := newTestHead(t, 1000, wlog.CompressionNone, true) + head.opts.OutOfOrderCapMax.Store(5) + head.opts.EnableOOONativeHistograms.Store(true) + + t.Cleanup(func() { + require.NoError(t, head.Close()) + }) + require.NoError(t, head.Init(0)) + + appendHistogram := func(ts int64, h *histogram.Histogram) { + app := head.Appender(context.Background()) + var err error + if floatHisto { + _, err = app.AppendHistogram(0, l, ts, nil, h.ToFloat(nil)) + } else { + _, err = app.AppendHistogram(0, l, ts, h.Copy(), nil) + } + require.NoError(t, err) + require.NoError(t, app.Commit()) + } + + type expOOOMmappedChunks struct { + header chunkenc.CounterResetHeader + mint, maxt int64 + numSamples uint16 + } + + var expChunks []expOOOMmappedChunks + checkOOOExpCounterResetHeader := func(newChunks ...expOOOMmappedChunks) { + expChunks = append(expChunks, newChunks...) + + ms, _, err := head.getOrCreate(l.Hash(), l) + require.NoError(t, err) + + require.Len(t, ms.ooo.oooMmappedChunks, len(expChunks)) + + for i, mmapChunk := range ms.ooo.oooMmappedChunks { + chk, err := head.chunkDiskMapper.Chunk(mmapChunk.ref) + require.NoError(t, err) + if floatHisto { + require.Equal(t, expChunks[i].header, chk.(*chunkenc.FloatHistogramChunk).GetCounterResetHeader()) + } else { + require.Equal(t, expChunks[i].header, chk.(*chunkenc.HistogramChunk).GetCounterResetHeader()) + } + require.Equal(t, expChunks[i].mint, mmapChunk.minTime) + require.Equal(t, expChunks[i].maxt, mmapChunk.maxTime) + require.Equal(t, expChunks[i].numSamples, mmapChunk.numSamples) + } + } + + // Append an in-order histogram, so the rest of the samples can be detected as OOO. + appendHistogram(1000, tsdbutil.GenerateTestHistogram(1000)) + + // OOO histogram + for i := 1; i <= 5; i++ { + appendHistogram(100+int64(i), tsdbutil.GenerateTestHistogram(1000+i)) + } + // Nothing mmapped yet. + checkOOOExpCounterResetHeader() + + // 6th observation (which triggers a head chunk mmapping). + appendHistogram(int64(112), tsdbutil.GenerateTestHistogram(1002)) + + // One mmapped chunk with (ts, val) [(101, 1001), (102, 1002), (103, 1003), (104, 1004), (105, 1005)]. + checkOOOExpCounterResetHeader(expOOOMmappedChunks{ + header: chunkenc.UnknownCounterReset, + mint: 101, + maxt: 105, + numSamples: 5, + }) + + // Add more samples, there's a counter reset at ts 122. + appendHistogram(int64(110), tsdbutil.GenerateTestHistogram(1001)) + appendHistogram(int64(124), tsdbutil.GenerateTestHistogram(904)) + appendHistogram(int64(123), tsdbutil.GenerateTestHistogram(903)) + appendHistogram(int64(122), tsdbutil.GenerateTestHistogram(902)) + + // New samples not mmapped yet. + checkOOOExpCounterResetHeader() + + // 11th observation (which triggers another head chunk mmapping). + appendHistogram(int64(200), tsdbutil.GenerateTestHistogram(2000)) + + // Two new mmapped chunks [(110, 1001), (112, 1002)], [(122, 902), (123, 903), (124, 904)]. + checkOOOExpCounterResetHeader( + expOOOMmappedChunks{ + header: chunkenc.UnknownCounterReset, + mint: 110, + maxt: 112, + numSamples: 2, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 122, + maxt: 124, + numSamples: 3, + }, + ) + + // Count is lower than previous sample at ts 200, and NotCounterReset is always ignored on append. + appendHistogram(int64(205), tsdbutil.SetHistogramNotCounterReset(tsdbutil.GenerateTestHistogram(1000))) + + appendHistogram(int64(210), tsdbutil.SetHistogramCounterReset(tsdbutil.GenerateTestHistogram(2010))) + + appendHistogram(int64(220), tsdbutil.GenerateTestHistogram(2020)) + + appendHistogram(int64(215), tsdbutil.GenerateTestHistogram(2005)) + + // 16th observation (which triggers another head chunk mmapping). + appendHistogram(int64(350), tsdbutil.GenerateTestHistogram(4000)) + + // Four new mmapped chunks: [(200, 2000)] [(205, 1000)], [(210, 2010)], [(215, 2015), (220, 2020)] + checkOOOExpCounterResetHeader( + expOOOMmappedChunks{ + header: chunkenc.UnknownCounterReset, + mint: 200, + maxt: 200, + numSamples: 1, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 205, + maxt: 205, + numSamples: 1, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 210, + maxt: 210, + numSamples: 1, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 215, + maxt: 220, + numSamples: 2, + }, + ) + + // Adding five more samples (21 in total), so another mmapped chunk is created. + appendHistogram(300, tsdbutil.SetHistogramCounterReset(tsdbutil.GenerateTestHistogram(3000))) + + for i := 1; i <= 4; i++ { + appendHistogram(300+int64(i), tsdbutil.GenerateTestHistogram(3000+i)) + } + + // One mmapped chunk with (ts, val) [(300, 3000), (301, 3001), (302, 3002), (303, 3003), (350, 4000)]. + checkOOOExpCounterResetHeader(expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 300, + maxt: 350, + numSamples: 5, + }) + }) + } +} + func TestAppendingDifferentEncodingToSameSeries(t *testing.T) { dir := t.TempDir() opts := DefaultOptions() opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) t.Cleanup(func() { @@ -4900,6 +5109,8 @@ func testWBLReplay(t *testing.T, scenario sampleTypeScenario) { opts.ChunkRange = 1000 opts.ChunkDirRoot = dir opts.OutOfOrderTimeWindow.Store(30 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -4909,13 +5120,12 @@ func testWBLReplay(t *testing.T, scenario sampleTypeScenario) { l := labels.FromStrings("foo", "bar") appendSample := func(mins int64, val float64, isOOO bool) { app := h.Appender(context.Background()) - ts, v := mins*time.Minute.Milliseconds(), val - _, err := app.Append(0, l, ts, v) + _, s, err := scenario.appendFunc(app, l, mins*time.Minute.Milliseconds(), mins) require.NoError(t, err) require.NoError(t, app.Commit()) if isOOO { - expOOOSamples = append(expOOOSamples, sample{t: ts, f: v}) + expOOOSamples = append(expOOOSamples, s) } } @@ -4968,7 +5178,7 @@ func testWBLReplay(t *testing.T, scenario sampleTypeScenario) { // Passing in true for the 'ignoreCounterResets' parameter prevents differences in counter reset headers // from being factored in to the sample comparison // TODO(fionaliao): understand counter reset behaviour, might want to modify this later - requireEqualSamples(t, l.String(), expOOOSamples, actOOOSamples, true) + requireEqualSamples(t, l.String(), expOOOSamples, actOOOSamples, requireEqualSamplesIgnoreCounterResets) require.NoError(t, h.Close()) } @@ -4994,6 +5204,8 @@ func testOOOMmapReplay(t *testing.T, scenario sampleTypeScenario) { opts.ChunkDirRoot = dir opts.OutOfOrderCapMax.Store(30) opts.OutOfOrderTimeWindow.Store(1000 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -5295,6 +5507,8 @@ func testOOOAppendWithNoSeries(t *testing.T, appendFunc func(appender storage.Ap opts.ChunkDirRoot = dir opts.OutOfOrderCapMax.Store(30) opts.OutOfOrderTimeWindow.Store(120 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -5368,7 +5582,9 @@ func testOOOAppendWithNoSeries(t *testing.T, appendFunc func(appender storage.Ap func TestHeadMinOOOTimeUpdate(t *testing.T) { for name, scenario := range sampleTypeScenarios { t.Run(name, func(t *testing.T) { - testHeadMinOOOTimeUpdate(t, scenario) + if scenario.sampleType == sampleMetricTypeFloat { + testHeadMinOOOTimeUpdate(t, scenario) + } }) } } @@ -5383,6 +5599,8 @@ func testHeadMinOOOTimeUpdate(t *testing.T, scenario sampleTypeScenario) { opts := DefaultHeadOptions() opts.ChunkDirRoot = dir opts.OutOfOrderTimeWindow.Store(10 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -6062,11 +6280,15 @@ func TestHeadAppender_AppendFloatWithSameTimestampAsPreviousHistogram(t *testing require.ErrorIs(t, err, storage.NewDuplicateHistogramToFloatErr(2_000, 10.0)) } -func TestHeadAppender_AppendCTZeroSample(t *testing.T) { +func TestHeadAppender_AppendCT(t *testing.T) { + testHistogram := tsdbutil.GenerateTestHistogram(1) + testFloatHistogram := tsdbutil.GenerateTestFloatHistogram(1) type appendableSamples struct { - ts int64 - val float64 - ct int64 + ts int64 + fSample float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + ct int64 } for _, tc := range []struct { name string @@ -6074,20 +6296,10 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { expectedSamples []chunks.Sample }{ { - name: "In order ct+normal sample", + name: "In order ct+normal sample/floatSample", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - }, - expectedSamples: []chunks.Sample{ - sample{t: 1, f: 0}, - sample{t: 100, f: 10}, - }, - }, - { - name: "Consecutive appends with same ct ignore ct", - appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 101, val: 10, ct: 1}, + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 1}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -6096,10 +6308,86 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { }, }, { - name: "Consecutive appends with newer ct do not ignore ct", + name: "In order ct+normal sample/histogram", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 102, val: 10, ct: 101}, + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "In order ct+normal sample/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with same ct ignore ct/floatSample", + appendableSamples: []appendableSamples{ + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 1}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, f: 0}, + sample{t: 100, f: 10}, + sample{t: 101, f: 10}, + }, + }, + { + name: "Consecutive appends with same ct ignore ct/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with same ct ignore ct/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with newer ct do not ignore ct/floatSample", + appendableSamples: []appendableSamples{ + {ts: 100, fSample: 10, ct: 1}, + {ts: 102, fSample: 10, ct: 101}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -6109,10 +6397,36 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { }, }, { - name: "CT equals to previous sample timestamp is ignored", + name: "Consecutive appends with newer ct do not ignore ct/histogram", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 101, val: 10, ct: 100}, + {ts: 100, h: testHistogram, ct: 1}, + {ts: 102, h: testHistogram, ct: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &histogram.Histogram{CounterResetHint: histogram.CounterReset}}, + sample{t: 102, h: testHistogram}, + }, + }, + { + name: "Consecutive appends with newer ct do not ignore ct/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 102, fh: testFloatHistogram, ct: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &histogram.FloatHistogram{CounterResetHint: histogram.CounterReset}}, + sample{t: 102, fh: testFloatHistogram}, + }, + }, + { + name: "CT equals to previous sample timestamp is ignored/floatSample", + appendableSamples: []appendableSamples{ + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 100}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -6120,6 +6434,38 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { sample{t: 101, f: 10}, }, }, + { + name: "CT equals to previous sample timestamp is ignored/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 100}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "CT equals to previous sample timestamp is ignored/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 100}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, } { t.Run(tc.name, func(t *testing.T) { h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) @@ -6129,10 +6475,21 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { a := h.Appender(context.Background()) lbls := labels.FromStrings("foo", "bar") for _, sample := range tc.appendableSamples { - _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) - require.NoError(t, err) - _, err = a.Append(0, lbls, sample.ts, sample.val) - require.NoError(t, err) + // Append float if it's a float test case + if sample.fSample != 0 { + _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) + require.NoError(t, err) + _, err = a.Append(0, lbls, sample.ts, sample.fSample) + require.NoError(t, err) + } + + // Append histograms if it's a histogram test case + if sample.h != nil || sample.fh != nil { + ref, err := a.AppendHistogramCTZeroSample(0, lbls, sample.ts, sample.ct, sample.h, sample.fh) + require.NoError(t, err) + _, err = a.AppendHistogram(ref, lbls, sample.ts, sample.h, sample.fh) + require.NoError(t, err) + } } require.NoError(t, a.Commit()) @@ -6167,3 +6524,60 @@ func (c *countSeriesLifecycleCallback) PostCreation(labels.Labels) { c.crea func (c *countSeriesLifecycleCallback) PostDeletion(s map[chunks.HeadSeriesRef]labels.Labels) { c.deleted.Add(int64(len(s))) } + +// Regression test for data race https://github.com/prometheus/prometheus/issues/15139. +func TestHeadAppendHistogramAndCommitConcurrency(t *testing.T) { + h := tsdbutil.GenerateTestHistogram(1) + fh := tsdbutil.GenerateTestFloatHistogram(1) + + testCases := map[string]func(storage.Appender, int) error{ + "integer histogram": func(app storage.Appender, i int) error { + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar", "serial", strconv.Itoa(i)), 1, h, nil) + return err + }, + "float histogram": func(app storage.Appender, i int) error { + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar", "serial", strconv.Itoa(i)), 1, nil, fh) + return err + }, + } + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + testHeadAppendHistogramAndCommitConcurrency(t, tc) + }) + } +} + +func testHeadAppendHistogramAndCommitConcurrency(t *testing.T, appendFn func(storage.Appender, int) error) { + head, _ := newTestHead(t, 1000, wlog.CompressionNone, false) + defer func() { + require.NoError(t, head.Close()) + }() + + wg := sync.WaitGroup{} + wg.Add(2) + + // How this works: Commit() should be atomic, thus one of the commits will + // be first and the other second. The first commit will create a new series + // and write a sample. The second commit will see an exact duplicate sample + // which it should ignore. Unless there's a race that causes the + // memSeries.lastHistogram to be corrupt and fail the duplicate check. + go func() { + defer wg.Done() + for i := 0; i < 10000; i++ { + app := head.Appender(context.Background()) + require.NoError(t, appendFn(app, i)) + require.NoError(t, app.Commit()) + } + }() + + go func() { + defer wg.Done() + for i := 0; i < 10000; i++ { + app := head.Appender(context.Background()) + require.NoError(t, appendFn(app, i)) + require.NoError(t, app.Commit()) + } + }() + + wg.Wait() +} diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index ef96b53305..8103926dc6 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -24,7 +24,6 @@ import ( "sync" "time" - "github.com/go-kit/log/level" "go.uber.org/atomic" "github.com/prometheus/prometheus/model/exemplar" @@ -128,7 +127,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch // replaying the WAL, so lets just log the error if it's not that type. err = h.exemplars.AddExemplar(ms.labels(), exemplar.Exemplar{Ts: e.T, Value: e.V, Labels: e.Labels}) if err != nil && errors.Is(err, storage.ErrOutOfOrderExemplar) { - level.Warn(h.logger).Log("msg", "Unexpected error when replaying WAL on exemplar record", "err", err) + h.logger.Warn("Unexpected error when replaying WAL on exemplar record", "err", err) } } }(exemplarsInput) @@ -421,8 +420,8 @@ Outer: } if unknownRefs.Load()+unknownExemplarRefs.Load()+unknownHistogramRefs.Load()+unknownMetadataRefs.Load() > 0 { - level.Warn(h.logger).Log( - "msg", "Unknown series references", + h.logger.Warn( + "Unknown series references", "samples", unknownRefs.Load(), "exemplars", unknownExemplarRefs.Load(), "histograms", unknownHistogramRefs.Load(), @@ -430,7 +429,7 @@ Outer: ) } if count := mmapOverlappingChunks.Load(); count > 0 { - level.Info(h.logger).Log("msg", "Overlapping m-map chunks on duplicate series records", "count", count) + h.logger.Info("Overlapping m-map chunks on duplicate series records", "count", count) } return nil } @@ -446,8 +445,8 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*m mmc[0].minTime, mmc[len(mmc)-1].maxTime, ) { - level.Debug(h.logger).Log( - "msg", "M-mapped chunks overlap on a duplicate series record", + h.logger.Debug( + "M-mapped chunks overlap on a duplicate series record", "series", mSeries.labels().String(), "oldref", mSeries.ref, "oldmint", mSeries.mmappedChunks[0].minTime, @@ -646,9 +645,9 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp } func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, lastMmapRef chunks.ChunkDiskMapperRef) (err error) { - // Track number of samples, m-map markers, that referenced a series we don't know about + // Track number of samples, histogram samples, m-map markers, that referenced a series we don't know about // for error reporting. - var unknownRefs, mmapMarkerUnknownRefs atomic.Uint64 + var unknownRefs, unknownHistogramRefs, mmapMarkerUnknownRefs atomic.Uint64 lastSeq, lastOff := lastMmapRef.Unpack() // Start workers that each process samples for a partition of the series ID space. @@ -657,8 +656,9 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch concurrency = h.opts.WALReplayConcurrency processors = make([]wblSubsetProcessor, concurrency) - dec = record.NewDecoder(syms) - shards = make([][]record.RefSample, concurrency) + dec record.Decoder + shards = make([][]record.RefSample, concurrency) + histogramShards = make([][]histogramRecord, concurrency) decodedCh = make(chan interface{}, 10) decodeErr error @@ -672,6 +672,16 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch return []record.RefMmapMarker{} }, } + histogramSamplesPool = sync.Pool{ + New: func() interface{} { + return []record.RefHistogramSample{} + }, + } + floatHistogramSamplesPool = sync.Pool{ + New: func() interface{} { + return []record.RefFloatHistogramSample{} + }, + } ) defer func() { @@ -692,8 +702,9 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch processors[i].setup() go func(wp *wblSubsetProcessor) { - unknown := wp.processWBLSamples(h) + unknown, unknownHistograms := wp.processWBLSamples(h) unknownRefs.Add(unknown) + unknownHistogramRefs.Add(unknownHistograms) wg.Done() }(&processors[i]) } @@ -727,6 +738,30 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch return } decodedCh <- markers + case record.HistogramSamples: + hists := histogramSamplesPool.Get().([]record.RefHistogramSample)[:0] + hists, err = dec.HistogramSamples(rec, hists) + if err != nil { + decodeErr = &wlog.CorruptionErr{ + Err: fmt.Errorf("decode histograms: %w", err), + Segment: r.Segment(), + Offset: r.Offset(), + } + return + } + decodedCh <- hists + case record.FloatHistogramSamples: + hists := floatHistogramSamplesPool.Get().([]record.RefFloatHistogramSample)[:0] + hists, err = dec.FloatHistogramSamples(rec, hists) + if err != nil { + decodeErr = &wlog.CorruptionErr{ + Err: fmt.Errorf("decode float histograms: %w", err), + Segment: r.Segment(), + Offset: r.Offset(), + } + return + } + decodedCh <- hists default: // Noop. } @@ -791,6 +826,70 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch idx := uint64(ms.ref) % uint64(concurrency) processors[idx].input <- wblSubsetProcessorInputItem{mmappedSeries: ms} } + case []record.RefHistogramSample: + samples := v + // We split up the samples into chunks of 5000 samples or less. + // With O(300 * #cores) in-flight sample batches, large scrapes could otherwise + // cause thousands of very large in flight buffers occupying large amounts + // of unused memory. + for len(samples) > 0 { + m := 5000 + if len(samples) < m { + m = len(samples) + } + for i := 0; i < concurrency; i++ { + if histogramShards[i] == nil { + histogramShards[i] = processors[i].reuseHistogramBuf() + } + } + for _, sam := range samples[:m] { + if r, ok := multiRef[sam.Ref]; ok { + sam.Ref = r + } + mod := uint64(sam.Ref) % uint64(concurrency) + histogramShards[mod] = append(histogramShards[mod], histogramRecord{ref: sam.Ref, t: sam.T, h: sam.H}) + } + for i := 0; i < concurrency; i++ { + if len(histogramShards[i]) > 0 { + processors[i].input <- wblSubsetProcessorInputItem{histogramSamples: histogramShards[i]} + histogramShards[i] = nil + } + } + samples = samples[m:] + } + histogramSamplesPool.Put(v) //nolint:staticcheck + case []record.RefFloatHistogramSample: + samples := v + // We split up the samples into chunks of 5000 samples or less. + // With O(300 * #cores) in-flight sample batches, large scrapes could otherwise + // cause thousands of very large in flight buffers occupying large amounts + // of unused memory. + for len(samples) > 0 { + m := 5000 + if len(samples) < m { + m = len(samples) + } + for i := 0; i < concurrency; i++ { + if histogramShards[i] == nil { + histogramShards[i] = processors[i].reuseHistogramBuf() + } + } + for _, sam := range samples[:m] { + if r, ok := multiRef[sam.Ref]; ok { + sam.Ref = r + } + mod := uint64(sam.Ref) % uint64(concurrency) + histogramShards[mod] = append(histogramShards[mod], histogramRecord{ref: sam.Ref, t: sam.T, fh: sam.FH}) + } + for i := 0; i < concurrency; i++ { + if len(histogramShards[i]) > 0 { + processors[i].input <- wblSubsetProcessorInputItem{histogramSamples: histogramShards[i]} + histogramShards[i] = nil + } + } + samples = samples[m:] + } + floatHistogramSamplesPool.Put(v) //nolint:staticcheck default: panic(fmt.Errorf("unexpected decodedCh type: %T", d)) } @@ -811,7 +910,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } if unknownRefs.Load() > 0 || mmapMarkerUnknownRefs.Load() > 0 { - level.Warn(h.logger).Log("msg", "Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load()) + h.logger.Warn("Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load()) } return nil } @@ -833,17 +932,20 @@ func (e errLoadWbl) Unwrap() error { } type wblSubsetProcessor struct { - input chan wblSubsetProcessorInputItem - output chan []record.RefSample + input chan wblSubsetProcessorInputItem + output chan []record.RefSample + histogramsOutput chan []histogramRecord } type wblSubsetProcessorInputItem struct { - mmappedSeries *memSeries - samples []record.RefSample + mmappedSeries *memSeries + samples []record.RefSample + histogramSamples []histogramRecord } func (wp *wblSubsetProcessor) setup() { wp.output = make(chan []record.RefSample, 300) + wp.histogramsOutput = make(chan []histogramRecord, 300) wp.input = make(chan wblSubsetProcessorInputItem, 300) } @@ -851,6 +953,8 @@ func (wp *wblSubsetProcessor) closeAndDrain() { close(wp.input) for range wp.output { } + for range wp.histogramsOutput { + } } // If there is a buffer in the output chan, return it for reuse, otherwise return nil. @@ -863,10 +967,21 @@ func (wp *wblSubsetProcessor) reuseBuf() []record.RefSample { return nil } +// If there is a buffer in the output chan, return it for reuse, otherwise return nil. +func (wp *wblSubsetProcessor) reuseHistogramBuf() []histogramRecord { + select { + case buf := <-wp.histogramsOutput: + return buf[:0] + default: + } + return nil +} + // processWBLSamples adds the samples it receives to the head and passes // the buffer received to an output channel for reuse. -func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs uint64) { +func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHistogramRefs uint64) { defer close(wp.output) + defer close(wp.histogramsOutput) oooCapMax := h.opts.OutOfOrderCapMax.Load() // We don't check for minValidTime for ooo samples. @@ -905,11 +1020,41 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs uint64) { case wp.output <- in.samples: default: } + for _, s := range in.histogramSamples { + ms := h.series.getByID(s.ref) + if ms == nil { + unknownHistogramRefs++ + continue + } + var chunkCreated bool + var ok bool + if s.h != nil { + ok, chunkCreated, _ = ms.insert(s.t, 0, s.h, nil, h.chunkDiskMapper, oooCapMax, h.logger) + } else { + ok, chunkCreated, _ = ms.insert(s.t, 0, nil, s.fh, h.chunkDiskMapper, oooCapMax, h.logger) + } + if chunkCreated { + h.metrics.chunksCreated.Inc() + h.metrics.chunks.Inc() + } + if ok { + if s.t > maxt { + maxt = s.t + } + if s.t < mint { + mint = s.t + } + } + } + select { + case wp.histogramsOutput <- in.histogramSamples: + default: + } } h.updateMinOOOMaxOOOTime(mint, maxt) - return unknownRefs + return unknownRefs, unknownHistogramRefs } const ( @@ -1066,7 +1211,7 @@ const chunkSnapshotPrefix = "chunk_snapshot." func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { if h.wal == nil { // If we are not storing any WAL, does not make sense to take a snapshot too. - level.Warn(h.logger).Log("msg", "skipping chunk snapshotting as WAL is disabled") + h.logger.Warn("skipping chunk snapshotting as WAL is disabled") return &ChunkSnapshotStats{}, nil } h.chunkSnapshotMtx.Lock() @@ -1215,7 +1360,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { // Leftover old chunk snapshots do not cause problems down the line beyond // occupying disk space. // They will just be ignored since a higher chunk snapshot exists. - level.Error(h.logger).Log("msg", "delete old chunk snapshots", "err", err) + h.logger.Error("delete old chunk snapshots", "err", err) } return stats, nil } @@ -1225,12 +1370,12 @@ func chunkSnapshotDir(wlast, woffset int) string { } func (h *Head) performChunkSnapshot() error { - level.Info(h.logger).Log("msg", "creating chunk snapshot") + h.logger.Info("creating chunk snapshot") startTime := time.Now() stats, err := h.ChunkSnapshot() elapsed := time.Since(startTime) if err == nil { - level.Info(h.logger).Log("msg", "chunk snapshot complete", "duration", elapsed.String(), "num_series", stats.TotalSeries, "dir", stats.Dir) + h.logger.Info("chunk snapshot complete", "duration", elapsed.String(), "num_series", stats.TotalSeries, "dir", stats.Dir) } if err != nil { return fmt.Errorf("chunk snapshot: %w", err) @@ -1345,7 +1490,7 @@ func (h *Head) loadChunkSnapshot() (int, int, map[chunks.HeadSeriesRef]*memSerie } defer func() { if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "error while closing the wal segments reader", "err", err) + h.logger.Warn("error while closing the wal segments reader", "err", err) } }() @@ -1534,9 +1679,9 @@ Outer: } elapsed := time.Since(start) - level.Info(h.logger).Log("msg", "chunk snapshot loaded", "dir", dir, "num_series", numSeries, "duration", elapsed.String()) + h.logger.Info("chunk snapshot loaded", "dir", dir, "num_series", numSeries, "duration", elapsed.String()) if unknownRefs > 0 { - level.Warn(h.logger).Log("msg", "unknown series references during chunk snapshot replay", "count", unknownRefs) + h.logger.Warn("unknown series references during chunk snapshot replay", "count", unknownRefs) } return snapIdx, snapOffset, refSeries, nil diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 0e0e353719..8c0f698eae 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -43,10 +43,12 @@ const ( // HeaderLen represents number of bytes reserved of index for header. HeaderLen = 5 - // FormatV1 represents 1 version of index. + // FormatV1 represents version 1 of index. FormatV1 = 1 - // FormatV2 represents 2 version of index. + // FormatV2 represents version 2 of index. FormatV2 = 2 + // FormatV3 represents version 3 of index. + FormatV3 = 3 indexFilename = "index" @@ -436,7 +438,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... return err } if labels.Compare(lset, w.lastSeries) <= 0 { - return fmt.Errorf("out-of-order series added with label set %q", lset) + return fmt.Errorf("out-of-order series added with label set %q, last label set %q", lset, w.lastSeries) } if ref < w.lastSeriesRef && !w.lastSeries.IsEmpty() { @@ -1193,7 +1195,9 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { } r.version = int(r.b.Range(4, 5)[0]) - if r.version != FormatV1 && r.version != FormatV2 { + switch r.version { + case FormatV1, FormatV2, FormatV3: + default: return nil, fmt.Errorf("unknown index file version %d", r.version) } @@ -1351,7 +1355,9 @@ func (s Symbols) Lookup(o uint32) (string, error) { B: s.bs.Range(0, s.bs.Len()), } - if s.version == FormatV2 { + if s.version == FormatV1 { + d.Skip(int(o)) + } else { if int(o) >= s.seen { return "", fmt.Errorf("unknown symbol offset %d", o) } @@ -1360,8 +1366,6 @@ func (s Symbols) Lookup(o uint32) (string, error) { for i := o - (o / symbolFactor * symbolFactor); i > 0; i-- { d.UvarintBytes() } - } else { - d.Skip(int(o)) } sym := d.UvarintStr() if d.Err() != nil { @@ -1407,10 +1411,10 @@ func (s Symbols) ReverseLookup(sym string) (uint32, error) { if lastSymbol != sym { return 0, fmt.Errorf("unknown symbol %q", sym) } - if s.version == FormatV2 { - return uint32(res), nil + if s.version == FormatV1 { + return uint32(s.bs.Len() - lastLen), nil } - return uint32(s.bs.Len() - lastLen), nil + return uint32(res), nil } func (s Symbols) Size() int { @@ -1569,7 +1573,7 @@ func (r *Reader) LabelNamesFor(ctx context.Context, postings Postings) ([]string offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } @@ -1608,7 +1612,7 @@ func (r *Reader) LabelValueFor(ctx context.Context, id storage.SeriesRef, label offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable) @@ -1634,7 +1638,7 @@ func (r *Reader) Series(id storage.SeriesRef, builder *labels.ScratchBuilder, ch offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable) @@ -2063,5 +2067,5 @@ func (dec *Decoder) Series(b []byte, builder *labels.ScratchBuilder, chks *[]chu } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index bfe74c323d..5ed41f7698 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -345,13 +345,22 @@ func (p *MemPostings) Add(id storage.SeriesRef, lset labels.Labels) { p.mtx.Unlock() } +func appendWithExponentialGrowth[T any](a []T, v T) []T { + if cap(a) < len(a)+1 { + newList := make([]T, len(a), len(a)*2+1) + copy(newList, a) + a = newList + } + return append(a, v) +} + func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { nm, ok := p.m[l.Name] if !ok { nm = map[string][]storage.SeriesRef{} p.m[l.Name] = nm } - list := append(nm[l.Value], id) + list := appendWithExponentialGrowth(nm[l.Value], id) nm[l.Value] = list if !p.ordered { diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index 66ae93325d..26cd4d057e 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -35,6 +35,7 @@ var _ IndexReader = &HeadAndOOOIndexReader{} type HeadAndOOOIndexReader struct { *headIndexReader // A reference to the headIndexReader so we can reuse as many interface implementation as possible. + inoMint int64 lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef } @@ -49,13 +50,13 @@ func (o mergedOOOChunks) Iterator(iterator chunkenc.Iterator) chunkenc.Iterator return storage.ChainSampleIteratorFromIterables(iterator, o.chunkIterables) } -func NewHeadAndOOOIndexReader(head *Head, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader { +func NewHeadAndOOOIndexReader(head *Head, inoMint, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader { hr := &headIndexReader{ head: head, mint: mint, maxt: maxt, } - return &HeadAndOOOIndexReader{hr, lastGarbageCollectedMmapRef} + return &HeadAndOOOIndexReader{hr, inoMint, lastGarbageCollectedMmapRef} } func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error { @@ -76,9 +77,9 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S *chks = (*chks)[:0] if s.ooo != nil { - return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, chks) + return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, oh.inoMint, chks) } - *chks = appendSeriesChunks(s, oh.mint, oh.maxt, *chks) + *chks = appendSeriesChunks(s, oh.inoMint, oh.maxt, *chks) return nil } @@ -87,7 +88,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S // // maxMmapRef tells upto what max m-map chunk that we can consider. If it is non-0, then // the oooHeadChunk will not be considered. -func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, chks *[]chunks.Meta) error { +func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, inoMint int64, chks *[]chunks.Meta) error { tmpChks := make([]chunks.Meta, 0, len(s.ooo.oooMmappedChunks)) addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) { @@ -111,7 +112,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap return nil } for _, chk := range chks { - addChunk(c.minTime, c.maxTime, ref, chk.chunk) + addChunk(chk.minTime, chk.maxTime, ref, chk.chunk) } } else { var emptyChunk chunkenc.Chunk @@ -128,7 +129,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap } if includeInOrder { - tmpChks = appendSeriesChunks(s, mint, maxt, tmpChks) + tmpChks = appendSeriesChunks(s, inoMint, maxt, tmpChks) } // There is nothing to do if we did not collect any chunk. @@ -476,7 +477,7 @@ func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, builder *l return nil } - return getOOOSeriesChunks(s, ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, chks) + return getOOOSeriesChunks(s, ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, 0, chks) } func (ir *OOOCompactionHeadIndexReader) SortedLabelValues(_ context.Context, name string, matchers ...*labels.Matcher) ([]string, error) { @@ -516,7 +517,7 @@ type HeadAndOOOQuerier struct { querier storage.Querier // Used for LabelNames, LabelValues, but may be nil if head was truncated in the mean time, in which case we ignore it and not close it in the end. } -func NewHeadAndOOOQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.Querier) storage.Querier { +func NewHeadAndOOOQuerier(inoMint, mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.Querier) storage.Querier { cr := &headChunkReader{ head: head, mint: mint, @@ -527,7 +528,7 @@ func NewHeadAndOOOQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolatio mint: mint, maxt: maxt, head: head, - index: NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef), + index: NewHeadAndOOOIndexReader(head, inoMint, mint, maxt, oooIsoState.minRef), chunkr: NewHeadAndOOOChunkReader(head, mint, maxt, cr, oooIsoState, 0), querier: querier, } @@ -568,7 +569,7 @@ type HeadAndOOOChunkQuerier struct { querier storage.ChunkQuerier } -func NewHeadAndOOOChunkQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.ChunkQuerier) storage.ChunkQuerier { +func NewHeadAndOOOChunkQuerier(inoMint, mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.ChunkQuerier) storage.ChunkQuerier { cr := &headChunkReader{ head: head, mint: mint, @@ -579,7 +580,7 @@ func NewHeadAndOOOChunkQuerier(mint, maxt int64, head *Head, oooIsoState *oooIso mint: mint, maxt: maxt, head: head, - index: NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef), + index: NewHeadAndOOOIndexReader(head, inoMint, mint, maxt, oooIsoState.minRef), chunkr: NewHeadAndOOOChunkReader(head, mint, maxt, cr, oooIsoState, 0), querier: querier, } diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go index 40e37043b8..17f551dd7d 100644 --- a/tsdb/ooo_head_read_test.go +++ b/tsdb/ooo_head_read_test.go @@ -360,7 +360,7 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { }) } - ir := NewHeadAndOOOIndexReader(h, tc.queryMinT, tc.queryMaxT, 0) + ir := NewHeadAndOOOIndexReader(h, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) var chks []chunks.Meta var b labels.ScratchBuilder @@ -389,6 +389,7 @@ func TestOOOHeadChunkReader_LabelValues(t *testing.T) { func testOOOHeadChunkReader_LabelValues(t *testing.T, scenario sampleTypeScenario) { chunkRange := int64(2000) head, _ := newTestHead(t, chunkRange, wlog.CompressionNone, true) + head.opts.EnableOOONativeHistograms.Store(true) t.Cleanup(func() { require.NoError(t, head.Close()) }) ctx := context.Background() @@ -450,7 +451,7 @@ func testOOOHeadChunkReader_LabelValues(t *testing.T, scenario sampleTypeScenari for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { // We first want to test using a head index reader that covers the biggest query interval - oh := NewHeadAndOOOIndexReader(head, tc.queryMinT, tc.queryMaxT, 0) + oh := NewHeadAndOOOIndexReader(head, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) matchers := []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")} values, err := oh.LabelValues(ctx, "foo", matchers...) sort.Strings(values) @@ -493,6 +494,8 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderCapMax = 5 opts.OutOfOrderTimeWindow = 120 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true s1 := labels.FromStrings("l", "v1") minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } @@ -854,7 +857,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { // The Series method populates the chunk metas, taking a copy of the // head OOO chunk if necessary. These are then used by the ChunkReader. - ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0) + ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) var chks []chunks.Meta var b labels.ScratchBuilder err = ir.Series(s1Ref, &b, &chks) @@ -875,7 +878,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { } resultSamples, err := storage.ExpandSamples(it, nil) require.NoError(t, err) - requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true) + requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, requireEqualSamplesIgnoreCounterResets) } }) } @@ -902,6 +905,8 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( opts := DefaultOptions() opts.OutOfOrderCapMax = 5 opts.OutOfOrderTimeWindow = 120 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true s1 := labels.FromStrings("l", "v1") minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } @@ -1023,7 +1028,7 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( // The Series method populates the chunk metas, taking a copy of the // head OOO chunk if necessary. These are then used by the ChunkReader. - ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0) + ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) var chks []chunks.Meta var b labels.ScratchBuilder err = ir.Series(s1Ref, &b, &chks) @@ -1049,7 +1054,7 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( it := iterable.Iterator(nil) resultSamples, err := storage.ExpandSamples(it, nil) require.NoError(t, err) - requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true) + requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, requireEqualSamplesIgnoreCounterResets) } }) } diff --git a/tsdb/ooo_head_test.go b/tsdb/ooo_head_test.go index d3cd5f6016..b9badfea21 100644 --- a/tsdb/ooo_head_test.go +++ b/tsdb/ooo_head_test.go @@ -28,15 +28,14 @@ import ( const testMaxSize int = 32 // Formulas chosen to make testing easy. -func valEven(pos int) int { return pos*2 + 2 } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values -func valOdd(pos int) int { return pos*2 + 1 } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals. +// Formulas chosen to make testing easy. +func valEven(pos int) int64 { return int64(pos*2 + 2) } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values +func valOdd(pos int) int64 { return int64(pos*2 + 1) } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals. -func samplify(v int) sample { return sample{int64(v), float64(v), nil, nil} } - -func makeEvenSampleSlice(n int) []sample { +func makeEvenSampleSlice(n int, sampleFunc func(ts int64) sample) []sample { s := make([]sample, n) for i := 0; i < n; i++ { - s[i] = samplify(valEven(i)) + s[i] = sampleFunc(valEven(i)) } return s } @@ -45,8 +44,36 @@ func makeEvenSampleSlice(n int) []sample { // - Number of pre-existing samples anywhere from 0 to testMaxSize-1. // - Insert new sample before first pre-existing samples, after the last, and anywhere in between. // - With a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves. -// Note: In all samples used, t always equals v in numeric value. when we talk about 'value' we just refer to a value that will be used for both sample.t and sample.v. func TestOOOInsert(t *testing.T) { + scenarios := map[string]struct { + sampleFunc func(ts int64) sample + }{ + "float": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testOOOInsert(t, scenario.sampleFunc) + }) + } +} + +func testOOOInsert(t *testing.T, + sampleFunc func(ts int64) sample, +) { for numPreExisting := 0; numPreExisting <= testMaxSize; numPreExisting++ { // For example, if we have numPreExisting 2, then: // chunk.samples indexes filled 0 1 @@ -56,20 +83,21 @@ func TestOOOInsert(t *testing.T) { for insertPos := 0; insertPos <= numPreExisting; insertPos++ { chunk := NewOOOChunk() - chunk.samples = makeEvenSampleSlice(numPreExisting) - newSample := samplify(valOdd(insertPos)) - chunk.Insert(newSample.t, newSample.f, nil, nil) + chunk.samples = make([]sample, numPreExisting) + chunk.samples = makeEvenSampleSlice(numPreExisting, sampleFunc) + newSample := sampleFunc(valOdd(insertPos)) + chunk.Insert(newSample.t, newSample.f, newSample.h, newSample.fh) var expSamples []sample // Our expected new samples slice, will be first the original samples. for i := 0; i < insertPos; i++ { - expSamples = append(expSamples, samplify(valEven(i))) + expSamples = append(expSamples, sampleFunc(valEven(i))) } // Then the new sample. expSamples = append(expSamples, newSample) // Followed by any original samples that were pushed back by the new one. for i := insertPos; i < numPreExisting; i++ { - expSamples = append(expSamples, samplify(valEven(i))) + expSamples = append(expSamples, sampleFunc(valEven(i))) } require.Equal(t, expSamples, chunk.samples, "numPreExisting %d, insertPos %d", numPreExisting, insertPos) @@ -81,17 +109,46 @@ func TestOOOInsert(t *testing.T) { // pre-existing samples, with between 1 and testMaxSize pre-existing samples and // with a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves. func TestOOOInsertDuplicate(t *testing.T) { + scenarios := map[string]struct { + sampleFunc func(ts int64) sample + }{ + "float": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testOOOInsertDuplicate(t, scenario.sampleFunc) + }) + } +} + +func testOOOInsertDuplicate(t *testing.T, + sampleFunc func(ts int64) sample, +) { for num := 1; num <= testMaxSize; num++ { for dupPos := 0; dupPos < num; dupPos++ { chunk := NewOOOChunk() - chunk.samples = makeEvenSampleSlice(num) + chunk.samples = makeEvenSampleSlice(num, sampleFunc) dupSample := chunk.samples[dupPos] dupSample.f = 0.123 - ok := chunk.Insert(dupSample.t, dupSample.f, nil, nil) + ok := chunk.Insert(dupSample.t, dupSample.f, dupSample.h, dupSample.fh) - expSamples := makeEvenSampleSlice(num) // We expect no change. + expSamples := makeEvenSampleSlice(num, sampleFunc) // We expect no change. require.False(t, ok) require.Equal(t, expSamples, chunk.samples, "num %d, dupPos %d", num, dupPos) } diff --git a/tsdb/querier.go b/tsdb/querier.go index 912c950329..b80faf881e 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -254,6 +254,10 @@ func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matc return nil, err } its = append(its, allPostings) + case m.Type == labels.MatchRegexp && m.Value == ".*": + // .* regexp matches any string: do nothing. + case m.Type == labels.MatchNotRegexp && m.Value == ".*": + return index.EmptyPostings(), nil case labelMustBeSet[m.Name]: // If this matcher must be non-empty, we can be smarter. matchesEmpty := m.Matches("") @@ -1018,9 +1022,9 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool { if newChunk != nil { if !recoded { p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt}) + cmint = t } currentChunk = newChunk - cmint = t } cmaxt = t diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go index 43accc253b..33dca1284d 100644 --- a/tsdb/querier_bench_test.go +++ b/tsdb/querier_bench_test.go @@ -105,17 +105,17 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { jFoo := labels.MustNewMatcher(labels.MatchEqual, "j", "foo") jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo") - iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") - i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$") - iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$") - iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1.*$") - iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$") - i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$") - iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$") + iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*") + i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.*") + iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*1") + iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*1.*") + iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", ".+") + i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+") + iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "") iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "") iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "i", "2"+postingsBenchSuffix) - iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$") - iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$") + iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "2.*") + iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*2.*") jFooBar := labels.MustNewMatcher(labels.MatchRegexp, "j", "foo|bar") jXXXYYY := labels.MustNewMatcher(labels.MatchRegexp, "j", "XXX|YYY") jXplus := labels.MustNewMatcher(labels.MatchRegexp, "j", "X.+") @@ -186,13 +186,13 @@ func benchmarkLabelValuesWithMatchers(b *testing.B, ir IndexReader) { i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+") i1PostingsBenchSuffix := labels.MustNewMatcher(labels.MatchEqual, "i", "1"+postingsBenchSuffix) iSuffix := labels.MustNewMatcher(labels.MatchRegexp, "i", ".+ddd") - iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") + iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*") jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo") jXXXYYY := labels.MustNewMatcher(labels.MatchRegexp, "j", "XXX|YYY") jXplus := labels.MustNewMatcher(labels.MatchRegexp, "j", "X.+") n1 := labels.MustNewMatcher(labels.MatchEqual, "n", "1"+postingsBenchSuffix) nX := labels.MustNewMatcher(labels.MatchNotEqual, "n", "X"+postingsBenchSuffix) - nPlus := labels.MustNewMatcher(labels.MatchRegexp, "n", "^.+$") + nPlus := labels.MustNewMatcher(labels.MatchRegexp, "n", ".+") ctx := context.Background() @@ -205,12 +205,12 @@ func benchmarkLabelValuesWithMatchers(b *testing.B, ir IndexReader) { {`i with i="1"`, "i", []*labels.Matcher{i1}}, // i has 100k values. {`i with n="1"`, "i", []*labels.Matcher{n1}}, - {`i with n="^.+$"`, "i", []*labels.Matcher{nPlus}}, + {`i with n=".+"`, "i", []*labels.Matcher{nPlus}}, {`i with n="1",j!="foo"`, "i", []*labels.Matcher{n1, jNotFoo}}, {`i with n="1",j=~"X.+"`, "i", []*labels.Matcher{n1, jXplus}}, {`i with n="1",j=~"XXX|YYY"`, "i", []*labels.Matcher{n1, jXXXYYY}}, {`i with n="X",j!="foo"`, "i", []*labels.Matcher{nX, jNotFoo}}, - {`i with n="1",i=~"^.*$",j!="foo"`, "i", []*labels.Matcher{n1, iStar, jNotFoo}}, + {`i with n="1",i=~".*",j!="foo"`, "i", []*labels.Matcher{n1, iStar, jNotFoo}}, // matchers on i itself {`i with i="1aaa...ddd"`, "i", []*labels.Matcher{i1PostingsBenchSuffix}}, {`i with i=~"1.+"`, "i", []*labels.Matcher{i1Plus}}, diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 50525f65f4..aca6c845b1 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -2689,6 +2689,7 @@ func TestPostingsForMatchers(t *testing.T) { app.Append(0, labels.FromStrings("n", "1"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "a"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "b"), 0, 0) + app.Append(0, labels.FromStrings("n", "1", "i", "\n"), 0, 0) app.Append(0, labels.FromStrings("n", "2"), 0, 0) app.Append(0, labels.FromStrings("n", "2.5"), 0, 0) require.NoError(t, app.Commit()) @@ -2704,6 +2705,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2722,6 +2724,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), labels.FromStrings("n", "2"), labels.FromStrings("n", "2.5"), }, @@ -2739,6 +2742,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2750,6 +2754,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2757,6 +2762,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, // Regex. @@ -2766,6 +2772,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2801,6 +2808,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2808,6 +2816,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, // Not regex. @@ -2816,6 +2825,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2849,12 +2859,14 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^a?$")}, exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2862,6 +2874,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2895,6 +2908,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), labels.FromStrings("n", "2"), }, }, @@ -2942,6 +2956,57 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "2.5"), }, }, + // Test shortcut for i=~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", ".*")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, + // Test shortcut for n=~".*" and i=~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, + // Test shortcut for n=~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchEqual, "i", "a")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + }, + }, + // Test shortcut for i!~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut for n!~"^.*$", i!~".*". First one triggers empty result. + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut i!~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut i!~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*$")}, + exp: []labels.Labels{}, + }, } ir, err := h.Index() @@ -3170,7 +3235,7 @@ func BenchmarkQueries(b *testing.B) { qHead, err := NewBlockQuerier(NewRangeHead(head, 1, nSamples), 1, nSamples) require.NoError(b, err) isoState := head.oooIso.TrackReadAfter(0) - qOOOHead := NewHeadAndOOOQuerier(1, nSamples, head, isoState, qHead) + qOOOHead := NewHeadAndOOOQuerier(1, 1, nSamples, head, isoState, qHead) queryTypes = append(queryTypes, qt{ fmt.Sprintf("_Head_oooPercent:%d", oooPercentage), qOOOHead, @@ -3235,7 +3300,7 @@ func (m mockMatcherIndex) LabelValueFor(context.Context, storage.SeriesRef, stri } func (m mockMatcherIndex) LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error) { - return nil, errors.New("label names for for called") + return nil, errors.New("label names for called") } func (m mockMatcherIndex) Postings(context.Context, string, ...string) (index.Postings, error) { @@ -3722,3 +3787,35 @@ func (m mockReaderOfLabels) Series(storage.SeriesRef, *labels.ScratchBuilder, *[ func (m mockReaderOfLabels) Symbols() index.StringIter { panic("Series called") } + +// TestMergeQuerierConcurrentSelectMatchers reproduces the data race bug from +// https://github.com/prometheus/prometheus/issues/14723, when one of the queriers (blockQuerier in this case) +// alters the passed matchers. +func TestMergeQuerierConcurrentSelectMatchers(t *testing.T) { + block, err := OpenBlock(nil, createBlock(t, t.TempDir(), genSeries(1, 1, 0, 1)), nil) + require.NoError(t, err) + defer func() { + require.NoError(t, block.Close()) + }() + p, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + // A secondary querier is required to enable concurrent select; a blockQuerier is used for simplicity. + s, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + originalMatchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "baz", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"), + } + matchers := append([]*labels.Matcher{}, originalMatchers...) + + mergedQuerier := storage.NewMergeQuerier([]storage.Querier{p}, []storage.Querier{s}, storage.ChainedSeriesMerge) + defer func() { + require.NoError(t, mergedQuerier.Close()) + }() + + mergedQuerier.Select(context.Background(), false, nil, matchers...) + + require.Equal(t, originalMatchers, matchers) +} diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go index da7748e187..f3a657aecb 100644 --- a/tsdb/record/record_test.go +++ b/tsdb/record/record_test.go @@ -166,7 +166,7 @@ func TestRecord_EncodeDecode(t *testing.T) { require.NoError(t, err) require.Equal(t, floatHistograms, decFloatHistograms) - // Gauge ingeger histograms. + // Gauge integer histograms. for i := range histograms { histograms[i].H.CounterResetHint = histogram.GaugeType } diff --git a/tsdb/repair.go b/tsdb/repair.go index 9d2c5738d1..8bdc645b5e 100644 --- a/tsdb/repair.go +++ b/tsdb/repair.go @@ -17,19 +17,17 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "os" "path/filepath" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/prometheus/prometheus/tsdb/fileutil" ) // repairBadIndexVersion repairs an issue in index and meta.json persistence introduced in // commit 129773b41a565fde5156301e37f9a87158030443. -func repairBadIndexVersion(logger log.Logger, dir string) error { +func repairBadIndexVersion(logger *slog.Logger, dir string) error { // All blocks written by Prometheus 2.1 with a meta.json version of 2 are affected. // We must actually set the index file version to 2 and revert the meta.json version back to 1. dirs, err := blockDirs(dir) @@ -41,7 +39,7 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { defer func() { for _, tmp := range tmpFiles { if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } } }() @@ -49,20 +47,20 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { for _, d := range dirs { meta, err := readBogusMetaFile(d) if err != nil { - level.Error(logger).Log("msg", "failed to read meta.json for a block during repair process; skipping", "dir", d, "err", err) + logger.Error("failed to read meta.json for a block during repair process; skipping", "dir", d, "err", err) continue } if meta.Version == metaVersion1 { - level.Info(logger).Log( - "msg", "Found healthy block", + logger.Info( + "Found healthy block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, ) continue } - level.Info(logger).Log( - "msg", "Fixing broken block", + logger.Info( + "Fixing broken block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, diff --git a/tsdb/testutil.go b/tsdb/testutil.go index 9730e47132..03587f4e2c 100644 --- a/tsdb/testutil.go +++ b/tsdb/testutil.go @@ -16,6 +16,8 @@ package tsdb import ( "testing" + "github.com/prometheus/prometheus/tsdb/tsdbutil" + prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" @@ -27,7 +29,11 @@ import ( ) const ( - float = "float" + float = "float" + intHistogram = "integer histogram" + floatHistogram = "float histogram" + gaugeIntHistogram = "gauge int histogram" + gaugeFloatHistogram = "gauge float histogram" ) type testValue struct { @@ -42,7 +48,6 @@ type sampleTypeScenario struct { sampleFunc func(ts, value int64) sample } -// TODO: native histogram sample types will be added as part of out-of-order native histogram support; see #11220. var sampleTypeScenarios = map[string]sampleTypeScenario{ float: { sampleType: sampleMetricTypeFloat, @@ -55,50 +60,50 @@ var sampleTypeScenarios = map[string]sampleTypeScenario{ return sample{t: ts, f: float64(value)} }, }, - // intHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} - // }, - // }, - // floatHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} - // }, - // }, - // gaugeIntHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} - // }, - // }, - // gaugeFloatHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} - // }, - // }, + intHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} + }, + }, + floatHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} + }, + }, + gaugeIntHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} + }, + }, + gaugeFloatHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} + }, + }, } // requireEqualSeries checks that the actual series are equal to the expected ones. It ignores the counter reset hints for histograms. @@ -106,7 +111,7 @@ func requireEqualSeries(t *testing.T, expected, actual map[string][]chunks.Sampl for name, expectedItem := range expected { actualItem, ok := actual[name] require.True(t, ok, "Expected series %s not found", name) - requireEqualSamples(t, name, expectedItem, actualItem, ignoreCounterResets) + requireEqualSamples(t, name, expectedItem, actualItem, requireEqualSamplesIgnoreCounterResets) } for name := range actual { _, ok := expected[name] @@ -121,7 +126,28 @@ func requireEqualOOOSamples(t *testing.T, expectedSamples int, db *DB) { "number of ooo appended samples mismatch") } -func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sample, ignoreCounterResets bool) { +type requireEqualSamplesOption int + +const ( + requireEqualSamplesNoOption requireEqualSamplesOption = iota + requireEqualSamplesIgnoreCounterResets + requireEqualSamplesInUseBucketCompare +) + +func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sample, options ...requireEqualSamplesOption) { + var ( + ignoreCounterResets bool + inUseBucketCompare bool + ) + for _, option := range options { + switch option { + case requireEqualSamplesIgnoreCounterResets: + ignoreCounterResets = true + case requireEqualSamplesInUseBucketCompare: + inUseBucketCompare = true + } + } + require.Equal(t, len(expected), len(actual), "Length not equal to expected for %s", name) for i, s := range expected { expectedSample := s @@ -139,6 +165,10 @@ func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sa } else { require.Equal(t, expectedHist.CounterResetHint, actualHist.CounterResetHint, "Sample header doesn't match for %s[%d] at ts %d, expected: %s, actual: %s", name, i, expectedSample.T(), counterResetAsString(expectedHist.CounterResetHint), counterResetAsString(actualHist.CounterResetHint)) } + if inUseBucketCompare { + expectedSample.H().Compact(0) + actualSample.H().Compact(0) + } require.Equal(t, expectedHist, actualHist, "Sample doesn't match for %s[%d] at ts %d", name, i, expectedSample.T()) } case s.FH() != nil: @@ -151,6 +181,10 @@ func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sa } else { require.Equal(t, expectedHist.CounterResetHint, actualHist.CounterResetHint, "Sample header doesn't match for %s[%d] at ts %d, expected: %s, actual: %s", name, i, expectedSample.T(), counterResetAsString(expectedHist.CounterResetHint), counterResetAsString(actualHist.CounterResetHint)) } + if inUseBucketCompare { + expectedSample.FH().Compact(0) + actualSample.FH().Compact(0) + } require.Equal(t, expectedHist, actualHist, "Sample doesn't match for %s[%d] at ts %d", name, i, expectedSample.T()) } default: diff --git a/tsdb/tombstones/tombstones.go b/tsdb/tombstones/tombstones.go index 4cea5005db..dcba298f3b 100644 --- a/tsdb/tombstones/tombstones.go +++ b/tsdb/tombstones/tombstones.go @@ -19,15 +19,13 @@ import ( "fmt" "hash" "hash/crc32" + "log/slog" "math" "os" "path/filepath" "sort" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/encoding" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -76,7 +74,7 @@ type Reader interface { Close() error } -func WriteFile(logger log.Logger, dir string, tr Reader) (int64, error) { +func WriteFile(logger *slog.Logger, dir string, tr Reader) (int64, error) { path := filepath.Join(dir, TombstonesFilename) tmp := path + ".tmp" hash := newCRC32() @@ -89,11 +87,11 @@ func WriteFile(logger log.Logger, dir string, tr Reader) (int64, error) { defer func() { if f != nil { if err := f.Close(); err != nil { - level.Error(logger).Log("msg", "close tmp file", "err", err.Error()) + logger.Error("close tmp file", "err", err.Error()) } } if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } }() diff --git a/tsdb/tombstones/tombstones_test.go b/tsdb/tombstones/tombstones_test.go index 36c9f1c1e3..cbf686e4bb 100644 --- a/tsdb/tombstones/tombstones_test.go +++ b/tsdb/tombstones/tombstones_test.go @@ -20,10 +20,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" "go.uber.org/goleak" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/storage" ) @@ -50,7 +51,7 @@ func TestWriteAndReadbackTombstones(t *testing.T) { stones.AddInterval(storage.SeriesRef(ref), dranges...) } - _, err := WriteFile(log.NewNopLogger(), tmpdir, stones) + _, err := WriteFile(promslog.NewNopLogger(), tmpdir, stones) require.NoError(t, err) restr, _, err := ReadTombstones(tmpdir) diff --git a/tsdb/tsdbblockutil.go b/tsdb/tsdbblockutil.go index f7b27c2e08..b49757223f 100644 --- a/tsdb/tsdbblockutil.go +++ b/tsdb/tsdbblockutil.go @@ -16,10 +16,9 @@ package tsdb import ( "context" "fmt" + "log/slog" "path/filepath" - "github.com/go-kit/log" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" ) @@ -27,7 +26,7 @@ import ( var ErrInvalidTimes = fmt.Errorf("max time is lesser than min time") // CreateBlock creates a chunkrange block from the samples passed to it, and writes it to disk. -func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger log.Logger) (string, error) { +func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger *slog.Logger) (string, error) { if chunkRange == 0 { chunkRange = DefaultBlockDuration } @@ -41,7 +40,7 @@ func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger l } defer func() { if err := w.Close(); err != nil { - logger.Log("err closing blockwriter", err.Error()) + logger.Error("err closing blockwriter", "err", err.Error()) } }() diff --git a/tsdb/tsdbutil/dir_locker.go b/tsdb/tsdbutil/dir_locker.go index fa939879ca..4b69e1f9d6 100644 --- a/tsdb/tsdbutil/dir_locker.go +++ b/tsdb/tsdbutil/dir_locker.go @@ -16,11 +16,10 @@ package tsdbutil import ( "errors" "fmt" + "log/slog" "os" "path/filepath" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -34,7 +33,7 @@ const ( ) type DirLocker struct { - logger log.Logger + logger *slog.Logger createdCleanly prometheus.Gauge @@ -43,7 +42,7 @@ type DirLocker struct { } // NewDirLocker creates a DirLocker that can obtain an exclusive lock on dir. -func NewDirLocker(dir, subsystem string, l log.Logger, r prometheus.Registerer) (*DirLocker, error) { +func NewDirLocker(dir, subsystem string, l *slog.Logger, r prometheus.Registerer) (*DirLocker, error) { lock := &DirLocker{ logger: l, createdCleanly: prometheus.NewGauge(prometheus.GaugeOpts{ @@ -74,7 +73,7 @@ func (l *DirLocker) Lock() error { } if _, err := os.Stat(l.path); err == nil { - level.Warn(l.logger).Log("msg", "A lockfile from a previous execution already existed. It was replaced", "file", l.path) + l.logger.Warn("A lockfile from a previous execution already existed. It was replaced", "file", l.path) l.createdCleanly.Set(lockfileReplaced) } else { diff --git a/tsdb/tsdbutil/dir_locker_test.go b/tsdb/tsdbutil/dir_locker_test.go index fc7d905b2d..65e2761692 100644 --- a/tsdb/tsdbutil/dir_locker_test.go +++ b/tsdb/tsdbutil/dir_locker_test.go @@ -16,15 +16,16 @@ package tsdbutil import ( "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/util/testutil" ) func TestLockfile(t *testing.T) { TestDirLockerUsage(t, func(t *testing.T, data string, createLock bool) (*DirLocker, testutil.Closer) { - locker, err := NewDirLocker(data, "tsdbutil", log.NewNopLogger(), nil) + locker, err := NewDirLocker(data, "tsdbutil", promslog.NewNopLogger(), nil) require.NoError(t, err) if createLock { diff --git a/tsdb/tsdbutil/dir_locker_testutil.go b/tsdb/tsdbutil/dir_locker_testutil.go index a4cf5abd68..7228dbafed 100644 --- a/tsdb/tsdbutil/dir_locker_testutil.go +++ b/tsdb/tsdbutil/dir_locker_testutil.go @@ -18,8 +18,8 @@ import ( "os" "testing" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/util/testutil" @@ -68,7 +68,7 @@ func TestDirLockerUsage(t *testing.T, open func(t *testing.T, data string, creat // Test preconditions (file already exists + lockfile option) if c.fileAlreadyExists { - tmpLocker, err := NewDirLocker(tmpdir, "tsdb", log.NewNopLogger(), nil) + tmpLocker, err := NewDirLocker(tmpdir, "tsdb", promslog.NewNopLogger(), nil) require.NoError(t, err) err = os.WriteFile(tmpLocker.path, []byte{}, 0o644) require.NoError(t, err) diff --git a/tsdb/wlog/checkpoint.go b/tsdb/wlog/checkpoint.go index a16cd5fc74..58e11c770e 100644 --- a/tsdb/wlog/checkpoint.go +++ b/tsdb/wlog/checkpoint.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "os" "path/filepath" @@ -25,9 +26,6 @@ import ( "strconv" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -94,11 +92,11 @@ const checkpointPrefix = "checkpoint." // segmented format as the original WAL itself. // This makes it easy to read it through the WAL package and concatenate // it with the original WAL. -func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { +func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { stats := &CheckpointStats{} var sgmReader io.ReadCloser - level.Info(logger).Log("msg", "Creating checkpoint", "from_segment", from, "to_segment", to, "mint", mint) + logger.Info("Creating checkpoint", "from_segment", from, "to_segment", to, "mint", mint) { var sgmRange []SegmentRange diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index a9786454de..8ee193f5ac 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -23,9 +23,10 @@ import ( "strings" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" @@ -244,7 +245,7 @@ func TestCheckpoint(t *testing.T) { } require.NoError(t, w.Close()) - stats, err := Checkpoint(log.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { + stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { return x%2 == 0 }, last/2) require.NoError(t, err) @@ -354,7 +355,7 @@ func TestCheckpointNoTmpFolderAfterError(t *testing.T) { require.NoError(t, f.Close()) // Run the checkpoint and since the wlog contains corrupt data this should return an error. - _, err = Checkpoint(log.NewNopLogger(), w, 0, 1, nil, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0) require.Error(t, err) // Walk the wlog dir to make sure there are no tmp folder left behind after the error. diff --git a/tsdb/wlog/live_reader.go b/tsdb/wlog/live_reader.go index 6eaef5f396..a017d362d1 100644 --- a/tsdb/wlog/live_reader.go +++ b/tsdb/wlog/live_reader.go @@ -20,9 +20,8 @@ import ( "fmt" "hash/crc32" "io" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/golang/snappy" "github.com/klauspost/compress/zstd" "github.com/prometheus/client_golang/prometheus" @@ -51,7 +50,7 @@ func NewLiveReaderMetrics(reg prometheus.Registerer) *LiveReaderMetrics { } // NewLiveReader returns a new live reader. -func NewLiveReader(logger log.Logger, metrics *LiveReaderMetrics, r io.Reader) *LiveReader { +func NewLiveReader(logger *slog.Logger, metrics *LiveReaderMetrics, r io.Reader) *LiveReader { // Calling zstd.NewReader with a nil io.Reader and no options cannot return an error. zstdReader, _ := zstd.NewReader(nil) @@ -73,7 +72,7 @@ func NewLiveReader(logger log.Logger, metrics *LiveReaderMetrics, r io.Reader) * // that are still in the process of being written, and returns records as soon // as they can be read. type LiveReader struct { - logger log.Logger + logger *slog.Logger rdr io.Reader err error rec []byte @@ -311,7 +310,7 @@ func (r *LiveReader) readRecord() ([]byte, int, error) { return nil, 0, fmt.Errorf("record would overflow current page: %d > %d", r.readIndex+recordHeaderSize+length, pageSize) } r.metrics.readerCorruptionErrors.WithLabelValues("record_span_page").Inc() - level.Warn(r.logger).Log("msg", "Record spans page boundaries", "start", r.readIndex, "end", recordHeaderSize+length, "pageSize", pageSize) + r.logger.Warn("Record spans page boundaries", "start", r.readIndex, "end", recordHeaderSize+length, "pageSize", pageSize) } if recordHeaderSize+length > pageSize { return nil, 0, fmt.Errorf("record length greater than a single page: %d > %d", recordHeaderSize+length, pageSize) diff --git a/tsdb/wlog/reader_test.go b/tsdb/wlog/reader_test.go index 484eff3664..2ac63cbf15 100644 --- a/tsdb/wlog/reader_test.go +++ b/tsdb/wlog/reader_test.go @@ -29,11 +29,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" - "github.com/prometheus/prometheus/util/testutil" ) type reader interface { @@ -53,7 +53,7 @@ var readerConstructors = map[string]func(io.Reader) reader{ return NewReader(r) }, "LiveReader": func(r io.Reader) reader { - lr := NewLiveReader(log.NewNopLogger(), NewLiveReaderMetrics(nil), r) + lr := NewLiveReader(promslog.NewNopLogger(), NewLiveReaderMetrics(nil), r) lr.eofNonErr = true return lr }, @@ -196,7 +196,7 @@ func TestReader(t *testing.T) { } func TestReader_Live(t *testing.T) { - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() for i := range testReaderCases { t.Run(strconv.Itoa(i), func(t *testing.T) { @@ -353,7 +353,7 @@ func TestReaderFuzz(t *testing.T) { } func TestReaderFuzz_Live(t *testing.T) { - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() for _, compress := range []CompressionType{CompressionNone, CompressionSnappy, CompressionZstd} { t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { dir := t.TempDir() @@ -441,7 +441,7 @@ func TestReaderFuzz_Live(t *testing.T) { func TestLiveReaderCorrupt_ShortFile(t *testing.T) { // Write a corrupt WAL segment, there is one record of pageSize in length, // but the segment is only half written. - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() dir := t.TempDir() w, err := NewSize(nil, nil, dir, pageSize, CompressionNone) @@ -481,7 +481,7 @@ func TestLiveReaderCorrupt_ShortFile(t *testing.T) { func TestLiveReaderCorrupt_RecordTooLongAndShort(t *testing.T) { // Write a corrupt WAL segment, when record len > page size. - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() dir := t.TempDir() w, err := NewSize(nil, nil, dir, pageSize*2, CompressionNone) diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go index ac5041e87b..d68ef2accb 100644 --- a/tsdb/wlog/watcher.go +++ b/tsdb/wlog/watcher.go @@ -17,6 +17,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "os" "path/filepath" @@ -24,9 +25,8 @@ import ( "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" @@ -84,7 +84,7 @@ type WatcherMetrics struct { type Watcher struct { name string writer WriteTo - logger log.Logger + logger *slog.Logger walDir string lastCheckpoint string sendExemplars bool @@ -172,9 +172,9 @@ func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics { } // NewWatcher creates a new WAL watcher for a given WriteTo. -func NewWatcher(metrics *WatcherMetrics, readerMetrics *LiveReaderMetrics, logger log.Logger, name string, writer WriteTo, dir string, sendExemplars, sendHistograms, sendMetadata bool) *Watcher { +func NewWatcher(metrics *WatcherMetrics, readerMetrics *LiveReaderMetrics, logger *slog.Logger, name string, writer WriteTo, dir string, sendExemplars, sendHistograms, sendMetadata bool) *Watcher { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Watcher{ logger: logger, @@ -222,7 +222,7 @@ func (w *Watcher) setMetrics() { // Start the Watcher. func (w *Watcher) Start() { w.setMetrics() - level.Info(w.logger).Log("msg", "Starting WAL watcher", "queue", w.name) + w.logger.Info("Starting WAL watcher", "queue", w.name) go w.loop() } @@ -241,7 +241,7 @@ func (w *Watcher) Stop() { w.metrics.currentSegment.DeleteLabelValues(w.name) } - level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name) + w.logger.Info("WAL watcher stopped", "queue", w.name) } func (w *Watcher) loop() { @@ -251,7 +251,7 @@ func (w *Watcher) loop() { for !isClosed(w.quit) { w.SetStartTime(time.Now()) if err := w.Run(); err != nil { - level.Error(w.logger).Log("msg", "error tailing WAL", "err", err) + w.logger.Error("error tailing WAL", "err", err) } select { @@ -274,7 +274,7 @@ func (w *Watcher) Run() error { // Run will be called again if there was a failure to read the WAL. w.sendSamples = false - level.Info(w.logger).Log("msg", "Replaying WAL", "queue", w.name) + w.logger.Info("Replaying WAL", "queue", w.name) // Backfill from the checkpoint first if it exists. lastCheckpoint, checkpointIndex, err := LastCheckpoint(w.walDir) @@ -294,13 +294,13 @@ func (w *Watcher) Run() error { return err } - level.Debug(w.logger).Log("msg", "Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment) + w.logger.Debug("Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment) for !isClosed(w.quit) { w.currentSegmentMetric.Set(float64(currentSegment)) // On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment. // On subsequent calls to this function, currentSegment will have been incremented and we should open that segment. - level.Debug(w.logger).Log("msg", "Processing segment", "currentSegment", currentSegment) + w.logger.Debug("Processing segment", "currentSegment", currentSegment) if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil && !errors.Is(err, ErrIgnorable) { return err } @@ -338,9 +338,9 @@ func (w *Watcher) readAndHandleError(r *LiveReader, segmentNum int, tail bool, s // Ignore all errors reading to end of segment whilst replaying the WAL. if !tail { if err != nil && !errors.Is(err, io.EOF) { - level.Warn(w.logger).Log("msg", "Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err) + w.logger.Warn("Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err) } else if r.Offset() != size { - level.Warn(w.logger).Log("msg", "Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", r.Offset(), "size", size) + w.logger.Warn("Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", r.Offset(), "size", size) } return ErrIgnorable } @@ -403,7 +403,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error { <-gcSem }() if err := w.garbageCollectSeries(segmentNum); err != nil { - level.Warn(w.logger).Log("msg", "Error process checkpoint", "err", err) + w.logger.Warn("Error process checkpoint", "err", err) } }() default: @@ -424,7 +424,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error { // we haven't read due to a notification in quite some time, try reading anyways case <-readTicker.C: - level.Debug(w.logger).Log("msg", "Watcher is reading the WAL due to timeout, haven't received any write notifications recently", "timeout", readTimeout) + w.logger.Debug("Watcher is reading the WAL due to timeout, haven't received any write notifications recently", "timeout", readTimeout) err := w.readAndHandleError(reader, segmentNum, tail, size) if err != nil { return err @@ -460,11 +460,11 @@ func (w *Watcher) garbageCollectSeries(segmentNum int) error { } if index >= segmentNum { - level.Debug(w.logger).Log("msg", "Current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir) + w.logger.Debug("Current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir) return nil } - level.Debug(w.logger).Log("msg", "New checkpoint detected", "new", dir, "currentSegment", segmentNum) + w.logger.Debug("New checkpoint detected", "new", dir, "currentSegment", segmentNum) if err = w.readCheckpoint(dir, (*Watcher).readSegmentForGC); err != nil { return fmt.Errorf("readCheckpoint: %w", err) @@ -519,7 +519,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } samplesToSend = append(samplesToSend, s) } @@ -564,7 +564,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } histogramsToSend = append(histogramsToSend, h) } @@ -592,7 +592,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } floatHistogramsToSend = append(floatHistogramsToSend, fh) } @@ -670,7 +670,7 @@ type segmentReadFn func(w *Watcher, r *LiveReader, segmentNum int, tail bool) er // Read all the series records from a Checkpoint directory. func (w *Watcher) readCheckpoint(checkpointDir string, readFn segmentReadFn) error { - level.Debug(w.logger).Log("msg", "Reading checkpoint", "dir", checkpointDir) + w.logger.Debug("Reading checkpoint", "dir", checkpointDir) index, err := checkpointNum(checkpointDir) if err != nil { return fmt.Errorf("checkpointNum: %w", err) @@ -704,7 +704,7 @@ func (w *Watcher) readCheckpoint(checkpointDir string, readFn segmentReadFn) err } } - level.Debug(w.logger).Log("msg", "Read series references from checkpoint", "checkpoint", checkpointDir) + w.logger.Debug("Read series references from checkpoint", "checkpoint", checkpointDir) return nil } diff --git a/tsdb/wlog/watcher_test.go b/tsdb/wlog/watcher_test.go index dc0314e8c9..68c2c5afda 100644 --- a/tsdb/wlog/watcher_test.go +++ b/tsdb/wlog/watcher_test.go @@ -22,9 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -52,6 +53,13 @@ func retry(t *testing.T, interval time.Duration, n int, f func() bool) { t.Logf("function returned false") } +// Overwrite readTimeout defined in watcher.go. +func overwriteReadTimeout(t *testing.T, val time.Duration) { + initialVal := readTimeout + readTimeout = val + t.Cleanup(func() { readTimeout = initialVal }) +} + type writeToMock struct { samplesAppended int exemplarsAppended int @@ -302,7 +310,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { } } require.NoError(t, w.Log(recs...)) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) _, _, err = Segments(w.Dir()) require.NoError(t, err) @@ -367,7 +375,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { } } - Checkpoint(log.NewNopLogger(), w, 0, 1, func(x chunks.HeadSeriesRef) bool { return true }, 0) + Checkpoint(promslog.NewNopLogger(), w, 0, 1, func(x chunks.HeadSeriesRef) bool { return true }, 0) w.Truncate(1) // Write more records after checkpointing. @@ -394,7 +402,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { _, _, err = Segments(w.Dir()) require.NoError(t, err) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) wt := newWriteToMock(0) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) go watcher.Start() @@ -458,7 +466,7 @@ func TestReadCheckpoint(t *testing.T) { } _, err = w.NextSegmentSync() require.NoError(t, err) - _, err = Checkpoint(log.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0) require.NoError(t, err) require.NoError(t, w.Truncate(32)) @@ -607,7 +615,7 @@ func TestCheckpointSeriesReset(t *testing.T) { _, _, err = Segments(w.Dir()) require.NoError(t, err) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) wt := newWriteToMock(0) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) watcher.MaxSegment = -1 @@ -621,7 +629,7 @@ func TestCheckpointSeriesReset(t *testing.T) { return wt.checkNumSeries() == seriesCount }, 10*time.Second, 1*time.Second) - _, err = Checkpoint(log.NewNopLogger(), w, 2, 4, func(x chunks.HeadSeriesRef) bool { return true }, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 2, 4, func(x chunks.HeadSeriesRef) bool { return true }, 0) require.NoError(t, err) err = w.Truncate(5) @@ -742,9 +750,6 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) { const seriesCount = 10 const samplesCount = 50 - // This test can take longer than intended to finish in cloud CI. - readTimeout := 10 * time.Second - for _, compress := range []CompressionType{CompressionNone, CompressionSnappy, CompressionZstd} { t.Run(string(compress), func(t *testing.T) { dir := t.TempDir() @@ -755,36 +760,50 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) { w, err := NewSize(nil, nil, wdir, segmentSize, compress) require.NoError(t, err) - var wg sync.WaitGroup - // Generate one segment initially to ensure that watcher.Run() finds at least one segment on disk. + // Write to 00000000, the watcher will read series from it. require.NoError(t, generateWALRecords(w, 0, seriesCount, samplesCount)) - w.NextSegment() // Force creation of the next segment - wg.Add(1) - go func() { - defer wg.Done() - for i := 1; i < segmentsToWrite; i++ { - require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount)) - w.NextSegment() - } - }() + // Create 00000001, the watcher will tail it once started. + w.NextSegment() + // Set up the watcher and run it in the background. wt := newWriteToMock(time.Millisecond) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + watcher.setMetrics() watcher.MaxSegment = segmentsToRead - watcher.setMetrics() - startTime := time.Now() - err = watcher.Run() - wg.Wait() - require.Less(t, time.Since(startTime), readTimeout) - - // But samples records shouldn't get dropped - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumSeries() > 0 + var g errgroup.Group + g.Go(func() error { + startTime := time.Now() + err = watcher.Run() + if err != nil { + return err + } + // If the watcher was to wait for readTicker to read every new segment, it would need readTimeout * segmentsToRead. + d := time.Since(startTime) + if d > readTimeout { + return fmt.Errorf("watcher ran for %s, it shouldn't rely on readTicker=%s to read the new segments", d, readTimeout) + } + return nil }) - require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) - require.NoError(t, err) + // The watcher went through 00000000 and is tailing the next one. + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumSeries() == seriesCount + }) + + // In the meantime, add some new segments in bulk. + // We should end up with segmentsToWrite + 1 segments now. + for i := 1; i < segmentsToWrite; i++ { + require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount)) + w.NextSegment() + } + + // Wait for the watcher. + require.NoError(t, g.Wait()) + + // All series and samples were read. + require.Equal(t, (segmentsToRead+1)*seriesCount, wt.checkNumSeries()) // Series from 00000000 are also read. + require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) require.NoError(t, w.Close()) }) } diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go index b14521f358..54c257d61a 100644 --- a/tsdb/wlog/wlog.go +++ b/tsdb/wlog/wlog.go @@ -21,6 +21,7 @@ import ( "fmt" "hash/crc32" "io" + "log/slog" "os" "path/filepath" "slices" @@ -28,11 +29,10 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/golang/snappy" "github.com/klauspost/compress/zstd" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/tsdb/fileutil" ) @@ -121,7 +121,7 @@ func (e *CorruptionErr) Unwrap() error { } // OpenWriteSegment opens segment k in dir. The returned segment is ready for new appends. -func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) { +func OpenWriteSegment(logger *slog.Logger, dir string, k int) (*Segment, error) { segName := SegmentName(dir, k) f, err := os.OpenFile(segName, os.O_WRONLY|os.O_APPEND, 0o666) if err != nil { @@ -138,7 +138,7 @@ func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) { // If it was torn mid-record, a full read (which the caller should do anyway // to ensure integrity) will detect it as a corruption by the end. if d := stat.Size() % pageSize; d != 0 { - level.Warn(logger).Log("msg", "Last page of the wlog is torn, filling it with zeros", "segment", segName) + logger.Warn("Last page of the wlog is torn, filling it with zeros", "segment", segName) if _, err := f.Write(make([]byte, pageSize-d)); err != nil { f.Close() return nil, fmt.Errorf("zero-pad torn page: %w", err) @@ -201,7 +201,7 @@ func ParseCompressionType(compress bool, compressType string) CompressionType { // beyond the most recent segment. type WL struct { dir string - logger log.Logger + logger *slog.Logger segmentSize int mtx sync.RWMutex segment *Segment // Active segment. @@ -286,7 +286,7 @@ func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { }, func() float64 { val, err := w.Size() if err != nil { - level.Error(w.logger).Log("msg", "Failed to calculate size of \"wal\" dir", + w.logger.Error("Failed to calculate size of \"wal\" dir", "err", err.Error()) } return float64(val) @@ -309,13 +309,13 @@ func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { } // New returns a new WAL over the given directory. -func New(logger log.Logger, reg prometheus.Registerer, dir string, compress CompressionType) (*WL, error) { +func New(logger *slog.Logger, reg prometheus.Registerer, dir string, compress CompressionType) (*WL, error) { return NewSize(logger, reg, dir, DefaultSegmentSize, compress) } // NewSize returns a new write log over the given directory. // New segments are created with the specified size. -func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSize int, compress CompressionType) (*WL, error) { +func NewSize(logger *slog.Logger, reg prometheus.Registerer, dir string, segmentSize int, compress CompressionType) (*WL, error) { if segmentSize%pageSize != 0 { return nil, errors.New("invalid segment size") } @@ -323,7 +323,7 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi return nil, fmt.Errorf("create dir: %w", err) } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } var zstdWriter *zstd.Encoder @@ -378,9 +378,9 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi } // Open an existing WAL. -func Open(logger log.Logger, dir string) (*WL, error) { +func Open(logger *slog.Logger, dir string) (*WL, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } zstdWriter, err := zstd.NewWriter(nil) if err != nil { @@ -443,7 +443,7 @@ func (w *WL) Repair(origErr error) error { if cerr.Segment < 0 { return errors.New("corruption error does not specify position") } - level.Warn(w.logger).Log("msg", "Starting corruption repair", + w.logger.Warn("Starting corruption repair", "segment", cerr.Segment, "offset", cerr.Offset) // All segments behind the corruption can no longer be used. @@ -451,7 +451,7 @@ func (w *WL) Repair(origErr error) error { if err != nil { return fmt.Errorf("list segments: %w", err) } - level.Warn(w.logger).Log("msg", "Deleting all segments newer than corrupted segment", "segment", cerr.Segment) + w.logger.Warn("Deleting all segments newer than corrupted segment", "segment", cerr.Segment) for _, s := range segs { if w.segment.i == s.index { @@ -473,7 +473,7 @@ func (w *WL) Repair(origErr error) error { // Regardless of the corruption offset, no record reaches into the previous segment. // So we can safely repair the WAL by removing the segment and re-inserting all // its records up to the corruption. - level.Warn(w.logger).Log("msg", "Rewrite corrupted segment", "segment", cerr.Segment) + w.logger.Warn("Rewrite corrupted segment", "segment", cerr.Segment) fn := SegmentName(w.Dir(), cerr.Segment) tmpfn := fn + ".repair" @@ -583,10 +583,10 @@ func (w *WL) nextSegment(async bool) (int, error) { // Don't block further writes by fsyncing the last segment. f := func() { if err := w.fsync(prev); err != nil { - level.Error(w.logger).Log("msg", "sync previous segment", "err", err) + w.logger.Error("sync previous segment", "err", err) } if err := prev.Close(); err != nil { - level.Error(w.logger).Log("msg", "close previous segment", "err", err) + w.logger.Error("close previous segment", "err", err) } } if async { @@ -890,10 +890,10 @@ func (w *WL) Close() (err error) { <-donec if err = w.fsync(w.segment); err != nil { - level.Error(w.logger).Log("msg", "sync previous segment", "err", err) + w.logger.Error("sync previous segment", "err", err) } if err := w.segment.Close(); err != nil { - level.Error(w.logger).Log("msg", "close previous segment", "err", err) + w.logger.Error("close previous segment", "err", err) } w.metrics.Unregister() diff --git a/tsdb/wlog/wlog_test.go b/tsdb/wlog/wlog_test.go index 165d2758f0..d195aaee2f 100644 --- a/tsdb/wlog/wlog_test.go +++ b/tsdb/wlog/wlog_test.go @@ -23,14 +23,13 @@ import ( "path/filepath" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "github.com/prometheus/prometheus/tsdb/fileutil" - "github.com/prometheus/prometheus/util/testutil" ) func TestMain(m *testing.M) { @@ -215,7 +214,7 @@ func TestCorruptAndCarryOn(t *testing.T) { dir := t.TempDir() var ( - logger = testutil.NewLogger(t) + logger = promslog.NewNopLogger() segmentSize = pageSize * 3 recordSize = (pageSize / 3) - recordHeaderSize ) @@ -568,7 +567,7 @@ func TestUnregisterMetrics(t *testing.T) { reg := prometheus.NewRegistry() for i := 0; i < 2; i++ { - wl, err := New(log.NewNopLogger(), reg, t.TempDir(), CompressionNone) + wl, err := New(promslog.NewNopLogger(), reg, t.TempDir(), CompressionNone) require.NoError(t, err) require.NoError(t, wl.Close()) } diff --git a/ui-commits b/ui-commits new file mode 100644 index 0000000000..7f34e1f95a --- /dev/null +++ b/ui-commits @@ -0,0 +1,12 @@ +dfec29d8e Fix border color for target pools with one target that is failing +65743bf9b ui: drop template readme +a7c1a951d Add general Mantine overrides CSS file +0757fbbec Make sure that alert element table headers are not wrapped +0180cf31a Factor out common icon and card styles +50af7d589 Fix tree line drawing by using a callback ref +ac01dc903 Explain, vector-to-vector: Do not compute results for set operators +9b0dc68d0 PromQL explain view: Support set operators +57898c792 Refactor and fix time formatting functions, add tests +091fc403c Fiddle with targets table styles to try and improve things a bit +a1908df92 Don't wrap action buttons below metric name in metrics explorer +ac5377873 mantine UI: Distinguish between Not Ready and Stopping diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index b0272b7fee..ebe74ecd11 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -146,6 +146,7 @@ var ( PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count/_bucket:", PromQLInfo) HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name", PromQLInfo) + IncompatibleTypesInBinOpInfo = fmt.Errorf("%w: incompatible sample types encountered for binary operator", PromQLInfo) ) type annoErr struct { @@ -273,3 +274,12 @@ func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange. Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityInfo, metricName), } } + +// NewIncompatibleTypesInBinOpInfo is used if binary operators act on a +// combination of types that doesn't work and therefore returns no result. +func NewIncompatibleTypesInBinOpInfo(lhsType, operator, rhsType string, pos posrange.PositionRange) error { + return annoErr{ + PositionRange: pos, + Err: fmt.Errorf("%w %q: %s %s %s", IncompatibleTypesInBinOpInfo, operator, lhsType, operator, rhsType), + } +} diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go new file mode 100644 index 0000000000..5e08422aa0 --- /dev/null +++ b/util/convertnhcb/convertnhcb.go @@ -0,0 +1,173 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package convertnhcb + +import ( + "fmt" + "math" + "sort" + "strings" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" +) + +// TempHistogram is used to collect information about classic histogram +// samples incrementally before creating a histogram.Histogram or +// histogram.FloatHistogram based on the values collected. +type TempHistogram struct { + BucketCounts map[float64]float64 + Count float64 + Sum float64 + HasFloat bool +} + +// NewTempHistogram creates a new TempHistogram to +// collect information about classic histogram samples. +func NewTempHistogram() TempHistogram { + return TempHistogram{ + BucketCounts: map[float64]float64{}, + } +} + +func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) { + bucketCounts := map[float64]int64{} + for le, count := range h.BucketCounts { + intCount := int64(math.Round(count)) + if float64(intCount) != count { + return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le) + } + bucketCounts[le] = intCount + } + return bucketCounts, nil +} + +// ProcessUpperBoundsAndCreateBaseHistogram prepares an integer native +// histogram with custom buckets based on the provided upper bounds. +// Everything is set except the bucket counts. +// The sorted upper bounds are also returned. +func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) { + sort.Float64s(upperBounds0) + var upperBounds []float64 + if needsDedup { + upperBounds = make([]float64, 0, len(upperBounds0)) + prevLE := math.Inf(-1) + for _, le := range upperBounds0 { + if le != prevLE { + upperBounds = append(upperBounds, le) + prevLE = le + } + } + } else { + upperBounds = upperBounds0 + } + var customBounds []float64 + if upperBounds[len(upperBounds)-1] == math.Inf(1) { + customBounds = upperBounds[:len(upperBounds)-1] + } else { + customBounds = upperBounds + } + return upperBounds, &histogram.Histogram{ + Count: 0, + Sum: 0, + Schema: histogram.CustomBucketsSchema, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: uint32(len(upperBounds))}, + }, + PositiveBuckets: make([]int64, len(upperBounds)), + CustomValues: customBounds, + } +} + +// NewHistogram fills the bucket counts in the provided histogram.Histogram +// or histogram.FloatHistogram based on the provided temporary histogram and +// upper bounds. +func NewHistogram(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) { + intBucketCounts, err := histogram.getIntBucketCounts() + if err != nil { + return nil, newFloatHistogram(histogram, upperBounds, histogram.BucketCounts, fhBase) + } + return newIntegerHistogram(histogram, upperBounds, intBucketCounts, hBase), nil +} + +func newIntegerHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram { + h := hBase.Copy() + absBucketCounts := make([]int64, len(h.PositiveBuckets)) + var prevCount, total int64 + for i, le := range upperBounds { + currCount, exists := bucketCounts[le] + if !exists { + currCount = 0 + } + count := currCount - prevCount + absBucketCounts[i] = count + total += count + prevCount = currCount + } + h.PositiveBuckets[0] = absBucketCounts[0] + for i := 1; i < len(h.PositiveBuckets); i++ { + h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1] + } + h.Sum = histogram.Sum + if histogram.Count != 0 { + total = int64(histogram.Count) + } + h.Count = uint64(total) + return h.Compact(0) +} + +func newFloatHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram { + fh := fhBase.Copy() + var prevCount, total float64 + for i, le := range upperBounds { + currCount, exists := bucketCounts[le] + if !exists { + currCount = 0 + } + count := currCount - prevCount + fh.PositiveBuckets[i] = count + total += count + prevCount = currCount + } + fh.Sum = histogram.Sum + if histogram.Count != 0 { + total = histogram.Count + } + fh.Count = total + return fh.Compact(0) +} + +func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels { + mName := m.Get(labels.MetricName) + return labels.NewBuilder(m). + Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). + Del(labels.BucketLabel). + Labels() +} + +// GetHistogramMetricBaseName removes the suffixes _bucket, _sum, _count from +// the metric name. We specifically do not remove the _created suffix as that +// should be removed by the caller. +func GetHistogramMetricBaseName(s string) string { + if r, ok := strings.CutSuffix(s, "_bucket"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_sum"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_count"); ok { + return r + } + return s +} diff --git a/util/fmtutil/format.go b/util/fmtutil/format.go index 9034a90fa7..a10908bb8c 100644 --- a/util/fmtutil/format.go +++ b/util/fmtutil/format.go @@ -113,7 +113,7 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, labels, timestamp, m.GetCounter().GetValue()) case m.Summary != nil: metricName := labels[model.MetricNameLabel] - // Preserve metric name order with first quantile labels timeseries then sum suffix timeserie and finally count suffix timeserie + // Preserve metric name order with first quantile labels timeseries then sum suffix timeseries and finally count suffix timeseries // Add Summary quantile timeseries quantileLabels := make(map[string]string, len(labels)+1) for key, value := range labels { @@ -125,16 +125,16 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, quantileLabels, timestamp, q.GetValue()) } // Overwrite label model.MetricNameLabel for count and sum metrics - // Add Summary sum timeserie + // Add Summary sum timeseries labels[model.MetricNameLabel] = metricName + sumStr toTimeseries(wr, labels, timestamp, m.GetSummary().GetSampleSum()) - // Add Summary count timeserie + // Add Summary count timeseries labels[model.MetricNameLabel] = metricName + countStr toTimeseries(wr, labels, timestamp, float64(m.GetSummary().GetSampleCount())) case m.Histogram != nil: metricName := labels[model.MetricNameLabel] - // Preserve metric name order with first bucket suffix timeseries then sum suffix timeserie and finally count suffix timeserie + // Preserve metric name order with first bucket suffix timeseries then sum suffix timeseries and finally count suffix timeseries // Add Histogram bucket timeseries bucketLabels := make(map[string]string, len(labels)+1) for key, value := range labels { @@ -146,10 +146,10 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, bucketLabels, timestamp, float64(b.GetCumulativeCount())) } // Overwrite label model.MetricNameLabel for count and sum metrics - // Add Histogram sum timeserie + // Add Histogram sum timeseries labels[model.MetricNameLabel] = metricName + sumStr toTimeseries(wr, labels, timestamp, m.GetHistogram().GetSampleSum()) - // Add Histogram count timeserie + // Add Histogram count timeseries labels[model.MetricNameLabel] = metricName + countStr toTimeseries(wr, labels, timestamp, float64(m.GetHistogram().GetSampleCount())) diff --git a/util/logging/dedupe.go b/util/logging/dedupe.go index d490a6afdf..d5aee5c095 100644 --- a/util/logging/dedupe.go +++ b/util/logging/dedupe.go @@ -14,12 +14,10 @@ package logging import ( - "bytes" + "context" + "log/slog" "sync" "time" - - "github.com/go-kit/log" - "github.com/go-logfmt/logfmt" ) const ( @@ -28,22 +26,9 @@ const ( maxEntries = 1024 ) -type logfmtEncoder struct { - *logfmt.Encoder - buf bytes.Buffer -} - -var logfmtEncoderPool = sync.Pool{ - New: func() interface{} { - var enc logfmtEncoder - enc.Encoder = logfmt.NewEncoder(&enc.buf) - return &enc - }, -} - -// Deduper implement log.Logger, dedupes log lines. +// Deduper implements *slog.Handler, dedupes log lines based on a time duration. type Deduper struct { - next log.Logger + next *slog.Logger repeat time.Duration quit chan struct{} mtx sync.RWMutex @@ -51,7 +36,7 @@ type Deduper struct { } // Dedupe log lines to next, only repeating every repeat duration. -func Dedupe(next log.Logger, repeat time.Duration) *Deduper { +func Dedupe(next *slog.Logger, repeat time.Duration) *Deduper { d := &Deduper{ next: next, repeat: repeat, @@ -62,6 +47,63 @@ func Dedupe(next log.Logger, repeat time.Duration) *Deduper { return d } +// Enabled returns true if the Deduper's internal slog.Logger is enabled at the +// provided context and log level, and returns false otherwise. It implements +// slog.Handler. +func (d *Deduper) Enabled(ctx context.Context, level slog.Level) bool { + return d.next.Enabled(ctx, level) +} + +// Handle uses the provided context and slog.Record to deduplicate messages +// every 1m. Log records received within the interval are not acted on, and +// thus dropped. Log records that pass deduplication and need action invoke the +// Handle() method on the Deduper's internal slog.Logger's handler, effectively +// chaining log calls to the internal slog.Logger. +func (d *Deduper) Handle(ctx context.Context, r slog.Record) error { + line := r.Message + d.mtx.RLock() + last, ok := d.seen[line] + d.mtx.RUnlock() + + if ok && time.Since(last) < d.repeat { + return nil + } + + d.mtx.Lock() + if len(d.seen) < maxEntries { + d.seen[line] = time.Now() + } + d.mtx.Unlock() + + return d.next.Handler().Handle(ctx, r.Clone()) +} + +// WithAttrs adds the provided attributes to the Deduper's internal +// slog.Logger. It implements slog.Handler. +func (d *Deduper) WithAttrs(attrs []slog.Attr) slog.Handler { + return &Deduper{ + next: slog.New(d.next.Handler().WithAttrs(attrs)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } +} + +// WithGroup adds the provided group name to the Deduper's internal +// slog.Logger. It implements slog.Handler. +func (d *Deduper) WithGroup(name string) slog.Handler { + if name == "" { + return d + } + + return &Deduper{ + next: slog.New(d.next.Handler().WithGroup(name)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } +} + // Stop the Deduper. func (d *Deduper) Stop() { close(d.quit) @@ -87,44 +129,3 @@ func (d *Deduper) run() { } } } - -// Log implements log.Logger. -func (d *Deduper) Log(keyvals ...interface{}) error { - line, err := encode(keyvals...) - if err != nil { - return err - } - - d.mtx.RLock() - last, ok := d.seen[line] - d.mtx.RUnlock() - - if ok && time.Since(last) < d.repeat { - return nil - } - - d.mtx.Lock() - if len(d.seen) < maxEntries { - d.seen[line] = time.Now() - } - d.mtx.Unlock() - - return d.next.Log(keyvals...) -} - -func encode(keyvals ...interface{}) (string, error) { - enc := logfmtEncoderPool.Get().(*logfmtEncoder) - enc.buf.Reset() - defer logfmtEncoderPool.Put(enc) - - if err := enc.EncodeKeyvals(keyvals...); err != nil { - return "", err - } - - // Add newline to the end of the buffer - if err := enc.EndRecord(); err != nil { - return "", err - } - - return enc.buf.String(), nil -} diff --git a/util/logging/dedupe_test.go b/util/logging/dedupe_test.go index e05d6454c5..5baa90b038 100644 --- a/util/logging/dedupe_test.go +++ b/util/logging/dedupe_test.go @@ -14,34 +14,45 @@ package logging import ( + "bytes" + "log/slog" + "strings" "testing" "time" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) -type counter int - -func (c *counter) Log(...interface{}) error { - (*c)++ - return nil -} - func TestDedupe(t *testing.T) { - var c counter - d := Dedupe(&c, 100*time.Millisecond) + var buf bytes.Buffer + d := Dedupe(promslog.New(&promslog.Config{Writer: &buf}), 100*time.Millisecond) + dlog := slog.New(d) defer d.Stop() // Log 10 times quickly, ensure they are deduped. for i := 0; i < 10; i++ { - err := d.Log("msg", "hello") - require.NoError(t, err) + dlog.Info("test", "hello", "world") } - require.Equal(t, 1, int(c)) + + // Trim empty lines + lines := []string{} + for _, line := range strings.Split(buf.String(), "\n") { + if line != "" { + lines = append(lines, line) + } + } + require.Len(t, lines, 1) // Wait, then log again, make sure it is logged. time.Sleep(200 * time.Millisecond) - err := d.Log("msg", "hello") - require.NoError(t, err) - require.Equal(t, 2, int(c)) + dlog.Info("test", "hello", "world") + // Trim empty lines + lines = []string{} + for _, line := range strings.Split(buf.String(), "\n") { + if line != "" { + lines = append(lines, line) + } + } + require.Len(t, lines, 2) } diff --git a/util/logging/file.go b/util/logging/file.go index 2afa828547..f20927beda 100644 --- a/util/logging/file.go +++ b/util/logging/file.go @@ -15,20 +15,15 @@ package logging import ( "fmt" + "log/slog" "os" - "time" - "github.com/go-kit/log" + "github.com/prometheus/common/promslog" ) -var timestampFormat = log.TimestampFormat( - func() time.Time { return time.Now().UTC() }, - "2006-01-02T15:04:05.000Z07:00", -) - -// JSONFileLogger represents a logger that writes JSON to a file. +// JSONFileLogger represents a logger that writes JSON to a file. It implements the promql.QueryLogger interface. type JSONFileLogger struct { - logger log.Logger + logger *slog.Logger file *os.File } @@ -40,21 +35,48 @@ func NewJSONFileLogger(s string) (*JSONFileLogger, error) { f, err := os.OpenFile(s, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) if err != nil { - return nil, fmt.Errorf("can't create json logger: %w", err) + return nil, fmt.Errorf("can't create json log file: %w", err) } + jsonFmt := &promslog.AllowedFormat{} + _ = jsonFmt.Set("json") return &JSONFileLogger{ - logger: log.With(log.NewJSONLogger(f), "ts", timestampFormat), + logger: promslog.New(&promslog.Config{Format: jsonFmt, Writer: f}), file: f, }, nil } -// Close closes the underlying file. +// Close closes the underlying file. It implements the promql.QueryLogger interface. func (l *JSONFileLogger) Close() error { return l.file.Close() } -// Log calls the Log function of the underlying logger. -func (l *JSONFileLogger) Log(i ...interface{}) error { - return l.logger.Log(i...) +// With calls the `With()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) With(args ...any) { + l.logger = l.logger.With(args...) +} + +// Info calls the `Info()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Info(msg string, args ...any) { + l.logger.Info(msg, args...) +} + +// Error calls the `Error()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Error(msg string, args ...any) { + l.logger.Error(msg, args...) +} + +// Debug calls the `Debug()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Debug(msg string, args ...any) { + l.logger.Debug(msg, args...) +} + +// Warn calls the `Warn()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Warn(msg string, args ...any) { + l.logger.Warn(msg, args...) } diff --git a/util/logging/file_test.go b/util/logging/file_test.go index 0e760a4848..8ab4754339 100644 --- a/util/logging/file_test.go +++ b/util/logging/file_test.go @@ -34,12 +34,13 @@ func TestJSONFileLogger_basic(t *testing.T) { require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l.Log("test", "yes") + l.Info("test", "hello", "world") require.NoError(t, err) r := make([]byte, 1024) _, err = f.Read(r) require.NoError(t, err) - result, err := regexp.Match(`^{"test":"yes","ts":"[^"]+"}\n`, r) + + result, err := regexp.Match(`^{"time":"[^"]+","level":"INFO","source":\{.+\},"msg":"test","hello":"world"}\n`, r) require.NoError(t, err) require.True(t, result, "unexpected content: %s", r) @@ -63,14 +64,14 @@ func TestJSONFileLogger_parallel(t *testing.T) { require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l.Log("test", "yes") + l.Info("test", "hello", "world") require.NoError(t, err) l2, err := NewJSONFileLogger(f.Name()) require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l2.Log("test", "yes") + l2.Info("test", "hello", "world") require.NoError(t, err) err = l.Close() diff --git a/util/logging/ratelimit.go b/util/logging/ratelimit.go deleted file mode 100644 index 32d1e249e6..0000000000 --- a/util/logging/ratelimit.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2019 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package logging - -import ( - "github.com/go-kit/log" - "golang.org/x/time/rate" -) - -type ratelimiter struct { - limiter *rate.Limiter - next log.Logger -} - -// RateLimit write to a logger. -func RateLimit(next log.Logger, limit rate.Limit) log.Logger { - return &ratelimiter{ - limiter: rate.NewLimiter(limit, int(limit)), - next: next, - } -} - -func (r *ratelimiter) Log(keyvals ...interface{}) error { - if r.limiter.Allow() { - return r.next.Log(keyvals...) - } - return nil -} diff --git a/util/notifications/notifications.go b/util/notifications/notifications.go new file mode 100644 index 0000000000..4888a0b664 --- /dev/null +++ b/util/notifications/notifications.go @@ -0,0 +1,185 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package notifications + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + ConfigurationUnsuccessful = "Configuration reload has failed." + StartingUp = "Prometheus is starting and replaying the write-ahead log (WAL)." + ShuttingDown = "Prometheus is shutting down and gracefully stopping all operations." +) + +// Notification represents an individual notification message. +type Notification struct { + Text string `json:"text"` + Date time.Time `json:"date"` + Active bool `json:"active"` +} + +// Notifications stores a list of Notification objects. +// It also manages live subscribers that receive notifications via channels. +type Notifications struct { + mu sync.Mutex + notifications []Notification + subscribers map[chan Notification]struct{} // Active subscribers. + maxSubscribers int + + subscriberGauge prometheus.Gauge + notificationsSent prometheus.Counter + notificationsDropped prometheus.Counter +} + +// NewNotifications creates a new Notifications instance. +func NewNotifications(maxSubscribers int, reg prometheus.Registerer) *Notifications { + n := &Notifications{ + subscribers: make(map[chan Notification]struct{}), + maxSubscribers: maxSubscribers, + subscriberGauge: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_active_subscribers", + Help: "The current number of active notification subscribers.", + }), + notificationsSent: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_updates_sent_total", + Help: "Total number of notification updates sent.", + }), + notificationsDropped: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_updates_dropped_total", + Help: "Total number of notification updates dropped.", + }), + } + + if reg != nil { + reg.MustRegister(n.subscriberGauge, n.notificationsSent, n.notificationsDropped) + } + + return n +} + +// AddNotification adds a new notification or updates the timestamp if it already exists. +func (n *Notifications) AddNotification(text string) { + n.mu.Lock() + defer n.mu.Unlock() + + for i, notification := range n.notifications { + if notification.Text == text { + n.notifications[i].Date = time.Now() + + n.notifySubscribers(n.notifications[i]) + return + } + } + + newNotification := Notification{ + Text: text, + Date: time.Now(), + Active: true, + } + n.notifications = append(n.notifications, newNotification) + + n.notifySubscribers(newNotification) +} + +// notifySubscribers sends a notification to all active subscribers. +func (n *Notifications) notifySubscribers(notification Notification) { + for sub := range n.subscribers { + // Non-blocking send to avoid subscriber blocking issues. + n.notificationsSent.Inc() + select { + case sub <- notification: + // Notification sent to the subscriber. + default: + // Drop the notification if the subscriber's channel is full. + n.notificationsDropped.Inc() + } + } +} + +// DeleteNotification removes the first notification that matches the provided text. +// The deleted notification is sent to subscribers with Active: false before being removed. +func (n *Notifications) DeleteNotification(text string) { + n.mu.Lock() + defer n.mu.Unlock() + + // Iterate through the notifications to find the matching text. + for i, notification := range n.notifications { + if notification.Text == text { + // Mark the notification as inactive and notify subscribers. + notification.Active = false + n.notifySubscribers(notification) + + // Remove the notification from the list. + n.notifications = append(n.notifications[:i], n.notifications[i+1:]...) + return + } + } +} + +// Get returns a copy of the list of notifications for safe access outside the struct. +func (n *Notifications) Get() []Notification { + n.mu.Lock() + defer n.mu.Unlock() + + // Return a copy of the notifications slice to avoid modifying the original slice outside. + notificationsCopy := make([]Notification, len(n.notifications)) + copy(notificationsCopy, n.notifications) + return notificationsCopy +} + +// Sub allows a client to subscribe to live notifications. +// It returns a channel where the subscriber will receive notifications and a function to unsubscribe. +// Each subscriber has its own goroutine to handle notifications and prevent blocking. +func (n *Notifications) Sub() (<-chan Notification, func(), bool) { + n.mu.Lock() + defer n.mu.Unlock() + + if len(n.subscribers) >= n.maxSubscribers { + return nil, nil, false + } + + ch := make(chan Notification, 10) // Buffered channel to prevent blocking. + + // Add the new subscriber to the list. + n.subscribers[ch] = struct{}{} + n.subscriberGauge.Set(float64(len(n.subscribers))) + + // Send all current notifications to the new subscriber. + for _, notification := range n.notifications { + ch <- notification + } + + // Unsubscribe function to remove the channel from subscribers. + unsubscribe := func() { + n.mu.Lock() + defer n.mu.Unlock() + + // Close the channel and remove it from the subscribers map. + close(ch) + delete(n.subscribers, ch) + n.subscriberGauge.Set(float64(len(n.subscribers))) + } + + return ch, unsubscribe, true +} diff --git a/util/notifications/notifications_test.go b/util/notifications/notifications_test.go new file mode 100644 index 0000000000..e487e9ce54 --- /dev/null +++ b/util/notifications/notifications_test.go @@ -0,0 +1,223 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package notifications + +import ( + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// TestNotificationLifecycle tests adding, modifying, and deleting notifications. +func TestNotificationLifecycle(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Add a notification. + notifs.AddNotification("Test Notification 1") + + // Check if the notification was added. + notifications := notifs.Get() + require.Len(t, notifications, 1, "Expected 1 notification after addition.") + require.Equal(t, "Test Notification 1", notifications[0].Text, "Notification text mismatch.") + require.True(t, notifications[0].Active, "Expected notification to be active.") + + // Modify the notification. + notifs.AddNotification("Test Notification 1") + notifications = notifs.Get() + require.Len(t, notifications, 1, "Expected 1 notification after modification.") + + // Delete the notification. + notifs.DeleteNotification("Test Notification 1") + notifications = notifs.Get() + require.Empty(t, notifications, "Expected no notifications after deletion.") +} + +// TestSubscriberReceivesNotifications tests that a subscriber receives notifications, including modifications and deletions. +func TestSubscriberReceivesNotifications(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe to notifications. + sub, unsubscribe, ok := notifs.Sub() + require.True(t, ok) + + var wg sync.WaitGroup + wg.Add(1) + + receivedNotifications := make([]Notification, 0) + + // Goroutine to listen for notifications. + go func() { + defer wg.Done() + for notification := range sub { + receivedNotifications = append(receivedNotifications, notification) + } + }() + + // Add notifications. + notifs.AddNotification("Test Notification 1") + notifs.AddNotification("Test Notification 2") + + // Modify a notification. + notifs.AddNotification("Test Notification 1") + + // Delete a notification. + notifs.DeleteNotification("Test Notification 2") + + // Wait for notifications to propagate. + time.Sleep(100 * time.Millisecond) + + unsubscribe() + wg.Wait() // Wait for the subscriber goroutine to finish. + + // Verify that we received the expected number of notifications. + require.Len(t, receivedNotifications, 4, "Expected 4 notifications (2 active, 1 modified, 1 deleted).") + + // Check the content and state of received notifications. + expected := []struct { + Text string + Active bool + }{ + {"Test Notification 1", true}, + {"Test Notification 2", true}, + {"Test Notification 1", true}, + {"Test Notification 2", false}, + } + + for i, n := range receivedNotifications { + require.Equal(t, expected[i].Text, n.Text, "Notification text mismatch at index %d.", i) + require.Equal(t, expected[i].Active, n.Active, "Notification active state mismatch at index %d.", i) + } +} + +// TestMultipleSubscribers tests that multiple subscribers receive notifications independently. +func TestMultipleSubscribers(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe two subscribers to notifications. + sub1, unsubscribe1, ok1 := notifs.Sub() + require.True(t, ok1) + + sub2, unsubscribe2, ok2 := notifs.Sub() + require.True(t, ok2) + + var wg sync.WaitGroup + wg.Add(2) + + receivedSub1 := make([]Notification, 0) + receivedSub2 := make([]Notification, 0) + + // Goroutine for subscriber 1. + go func() { + defer wg.Done() + for notification := range sub1 { + receivedSub1 = append(receivedSub1, notification) + } + }() + + // Goroutine for subscriber 2. + go func() { + defer wg.Done() + for notification := range sub2 { + receivedSub2 = append(receivedSub2, notification) + } + }() + + // Add and delete notifications. + notifs.AddNotification("Test Notification 1") + notifs.DeleteNotification("Test Notification 1") + + // Wait for notifications to propagate. + time.Sleep(100 * time.Millisecond) + + // Unsubscribe both. + unsubscribe1() + unsubscribe2() + + wg.Wait() + + // Both subscribers should have received the same 2 notifications. + require.Len(t, receivedSub1, 2, "Expected 2 notifications for subscriber 1.") + require.Len(t, receivedSub2, 2, "Expected 2 notifications for subscriber 2.") + + // Verify that both subscribers received the same notifications. + for i := 0; i < 2; i++ { + require.Equal(t, receivedSub1[i], receivedSub2[i], "Subscriber notification mismatch at index %d.", i) + } +} + +// TestUnsubscribe tests that unsubscribing prevents further notifications from being received. +func TestUnsubscribe(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe to notifications. + sub, unsubscribe, ok := notifs.Sub() + require.True(t, ok) + + var wg sync.WaitGroup + wg.Add(1) + + receivedNotifications := make([]Notification, 0) + + // Goroutine to listen for notifications. + go func() { + defer wg.Done() + for notification := range sub { + receivedNotifications = append(receivedNotifications, notification) + } + }() + + // Add a notification and then unsubscribe. + notifs.AddNotification("Test Notification 1") + time.Sleep(100 * time.Millisecond) // Allow time for notification delivery. + unsubscribe() // Unsubscribe. + + // Add another notification after unsubscribing. + notifs.AddNotification("Test Notification 2") + + // Wait for the subscriber goroutine to finish. + wg.Wait() + + // Only the first notification should have been received. + require.Len(t, receivedNotifications, 1, "Expected 1 notification before unsubscribe.") + require.Equal(t, "Test Notification 1", receivedNotifications[0].Text, "Unexpected notification text.") +} + +// TestMaxSubscribers tests that exceeding the max subscribers limit prevents additional subscriptions. +func TestMaxSubscribers(t *testing.T) { + maxSubscribers := 2 + notifs := NewNotifications(maxSubscribers, nil) + + // Subscribe the maximum number of subscribers. + _, unsubscribe1, ok1 := notifs.Sub() + require.True(t, ok1, "Expected first subscription to succeed.") + + _, unsubscribe2, ok2 := notifs.Sub() + require.True(t, ok2, "Expected second subscription to succeed.") + + // Try to subscribe more than the max allowed. + _, _, ok3 := notifs.Sub() + require.False(t, ok3, "Expected third subscription to fail due to max subscriber limit.") + + // Unsubscribe one subscriber and try again. + unsubscribe1() + + _, unsubscribe4, ok4 := notifs.Sub() + require.True(t, ok4, "Expected subscription to succeed after unsubscribing a subscriber.") + + // Clean up the subscriptions. + unsubscribe2() + unsubscribe4() +} diff --git a/util/runtime/limits_default.go b/util/runtime/limits_default.go index 0126adb1a8..156747d450 100644 --- a/util/runtime/limits_default.go +++ b/util/runtime/limits_default.go @@ -23,7 +23,7 @@ import ( // syscall.RLIM_INFINITY is a constant. // Its type is int on most architectures but there are exceptions such as loong64. -// Uniform it to uint accorind to the standard. +// Uniform it to uint according to the standard. // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_resource.h.html var unlimited uint64 = syscall.RLIM_INFINITY & math.MaxUint64 diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go index 7d1f9dda24..e15d591e0c 100644 --- a/util/teststorage/storage.go +++ b/util/teststorage/storage.go @@ -30,15 +30,15 @@ import ( // New returns a new TestStorage for testing purposes // that removes all associated files on closing. -func New(t testutil.T) *TestStorage { - stor, err := NewWithError() +func New(t testutil.T, outOfOrderTimeWindow ...int64) *TestStorage { + stor, err := NewWithError(outOfOrderTimeWindow...) require.NoError(t, err) return stor } // NewWithError returns a new TestStorage for user facing tests, which reports // errors directly. -func NewWithError() (*TestStorage, error) { +func NewWithError(outOfOrderTimeWindow ...int64) (*TestStorage, error) { dir, err := os.MkdirTemp("", "test_storage") if err != nil { return nil, fmt.Errorf("opening test directory: %w", err) @@ -51,6 +51,14 @@ func NewWithError() (*TestStorage, error) { opts.MaxBlockDuration = int64(24 * time.Hour / time.Millisecond) opts.RetentionDuration = 0 opts.EnableNativeHistograms = true + + // Set OutOfOrderTimeWindow if provided, otherwise use default (0) + if len(outOfOrderTimeWindow) > 0 { + opts.OutOfOrderTimeWindow = outOfOrderTimeWindow[0] + } else { + opts.OutOfOrderTimeWindow = 0 // Default value is zero + } + db, err := tsdb.Open(dir, nil, nil, opts, tsdb.NewDBStats()) if err != nil { return nil, fmt.Errorf("opening test storage: %w", err) diff --git a/util/testutil/port.go b/util/testutil/port.go index 1e449b123d..7cf4cf1ccc 100644 --- a/util/testutil/port.go +++ b/util/testutil/port.go @@ -15,21 +15,56 @@ package testutil import ( "net" + "sync" "testing" ) +var ( + mu sync.Mutex + usedPorts []int +) + // RandomUnprivilegedPort returns valid unprivileged random port number which can be used for testing. func RandomUnprivilegedPort(t *testing.T) int { t.Helper() + mu.Lock() + defer mu.Unlock() + port, err := getPort() + if err != nil { + t.Fatal(err) + } + + for portWasUsed(port) { + port, err = getPort() + if err != nil { + t.Fatal(err) + } + } + + usedPorts = append(usedPorts, port) + + return port +} + +func portWasUsed(port int) bool { + for _, usedPort := range usedPorts { + if port == usedPort { + return true + } + } + return false +} + +func getPort() (int, error) { listener, err := net.Listen("tcp", ":0") if err != nil { - t.Fatalf("Listening on random port: %v", err) + return 0, err } if err := listener.Close(); err != nil { - t.Fatalf("Closing listener: %v", err) + return 0, err } - return listener.Addr().(*net.TCPAddr).Port + return listener.Addr().(*net.TCPAddr).Port, nil } diff --git a/util/treecache/treecache.go b/util/treecache/treecache.go index bbbaaf3d6e..4d4b6f544c 100644 --- a/util/treecache/treecache.go +++ b/util/treecache/treecache.go @@ -17,12 +17,11 @@ import ( "bytes" "errors" "fmt" + "log/slog" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/go-zookeeper/zk" "github.com/prometheus/client_golang/prometheus" ) @@ -47,19 +46,19 @@ func init() { prometheus.MustRegister(numWatchers) } -// ZookeeperLogger wraps a log.Logger into a zk.Logger. +// ZookeeperLogger wraps a *slog.Logger into a zk.Logger. type ZookeeperLogger struct { - logger log.Logger + logger *slog.Logger } // NewZookeeperLogger is a constructor for ZookeeperLogger. -func NewZookeeperLogger(logger log.Logger) ZookeeperLogger { +func NewZookeeperLogger(logger *slog.Logger) ZookeeperLogger { return ZookeeperLogger{logger: logger} } // Printf implements zk.Logger. func (zl ZookeeperLogger) Printf(s string, i ...interface{}) { - level.Info(zl.logger).Log("msg", fmt.Sprintf(s, i...)) + zl.logger.Info(s, i...) } // A ZookeeperTreeCache keeps data from all children of a Zookeeper path @@ -72,7 +71,7 @@ type ZookeeperTreeCache struct { wg *sync.WaitGroup head *zookeeperTreeCacheNode - logger log.Logger + logger *slog.Logger } // A ZookeeperTreeCacheEvent models a Zookeeper event for a path. @@ -90,7 +89,7 @@ type zookeeperTreeCacheNode struct { } // NewZookeeperTreeCache creates a new ZookeeperTreeCache for a given path. -func NewZookeeperTreeCache(conn *zk.Conn, path string, events chan ZookeeperTreeCacheEvent, logger log.Logger) *ZookeeperTreeCache { +func NewZookeeperTreeCache(conn *zk.Conn, path string, events chan ZookeeperTreeCacheEvent, logger *slog.Logger) *ZookeeperTreeCache { tc := &ZookeeperTreeCache{ conn: conn, prefix: path, @@ -144,20 +143,20 @@ func (tc *ZookeeperTreeCache) loop(path string) { err := tc.recursiveNodeUpdate(path, tc.head) if err != nil { - level.Error(tc.logger).Log("msg", "Error during initial read of Zookeeper", "err", err) + tc.logger.Error("Error during initial read of Zookeeper", "err", err) failure() } for { select { case ev := <-tc.head.events: - level.Debug(tc.logger).Log("msg", "Received Zookeeper event", "event", ev) + tc.logger.Debug("Received Zookeeper event", "event", ev) if failureMode { continue } if ev.Type == zk.EventNotWatching { - level.Info(tc.logger).Log("msg", "Lost connection to Zookeeper.") + tc.logger.Info("Lost connection to Zookeeper.") failure() } else { path := strings.TrimPrefix(ev.Path, tc.prefix) @@ -178,15 +177,15 @@ func (tc *ZookeeperTreeCache) loop(path string) { switch err := tc.recursiveNodeUpdate(ev.Path, node); { case err != nil: - level.Error(tc.logger).Log("msg", "Error during processing of Zookeeper event", "err", err) + tc.logger.Error("Error during processing of Zookeeper event", "err", err) failure() case tc.head.data == nil: - level.Error(tc.logger).Log("msg", "Error during processing of Zookeeper event", "err", "path no longer exists", "path", tc.prefix) + tc.logger.Error("Error during processing of Zookeeper event", "err", "path no longer exists", "path", tc.prefix) failure() } } case <-retryChan: - level.Info(tc.logger).Log("msg", "Attempting to resync state with Zookeeper") + tc.logger.Info("Attempting to resync state with Zookeeper") previousState := &zookeeperTreeCacheNode{ children: tc.head.children, } @@ -194,13 +193,13 @@ func (tc *ZookeeperTreeCache) loop(path string) { tc.head.children = make(map[string]*zookeeperTreeCacheNode) if err := tc.recursiveNodeUpdate(tc.prefix, tc.head); err != nil { - level.Error(tc.logger).Log("msg", "Error during Zookeeper resync", "err", err) + tc.logger.Error("Error during Zookeeper resync", "err", err) // Revert to our previous state. tc.head.children = previousState.children failure() } else { tc.resyncState(tc.prefix, tc.head, previousState) - level.Info(tc.logger).Log("msg", "Zookeeper resync successful") + tc.logger.Info("Zookeeper resync successful") failureMode = false } case <-tc.stop: diff --git a/util/zeropool/pool_test.go b/util/zeropool/pool_test.go index fea8200226..e9793f64d7 100644 --- a/util/zeropool/pool_test.go +++ b/util/zeropool/pool_test.go @@ -81,7 +81,7 @@ func TestPool(t *testing.T) { t.Run("does not allocate", func(t *testing.T) { pool := zeropool.New(func() []byte { return make([]byte, 1024) }) - // Warm up, this will alloate one slice. + // Warm up, this will allocate one slice. slice := pool.Get() pool.Put(slice) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 0ec8467faa..b37605f5d5 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -15,8 +15,12 @@ package v1 import ( "context" + "crypto/sha1" + "encoding/hex" + "encoding/json" "errors" "fmt" + "log/slog" "math" "math/rand" "net" @@ -30,8 +34,6 @@ import ( "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" jsoniter "github.com/json-iterator/go" "github.com/munnerz/goautoneg" @@ -53,6 +55,7 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/httputil" + "github.com/prometheus/prometheus/util/notifications" "github.com/prometheus/prometheus/util/stats" ) @@ -202,16 +205,18 @@ type API struct { ready func(http.HandlerFunc) http.HandlerFunc globalURLOptions GlobalURLOptions - db TSDBAdminStats - dbDir string - enableAdmin bool - logger log.Logger - CORSOrigin *regexp.Regexp - buildInfo *PrometheusVersion - runtimeInfo func() (RuntimeInfo, error) - gatherer prometheus.Gatherer - isAgent bool - statsRenderer StatsRenderer + db TSDBAdminStats + dbDir string + enableAdmin bool + logger *slog.Logger + CORSOrigin *regexp.Regexp + buildInfo *PrometheusVersion + runtimeInfo func() (RuntimeInfo, error) + gatherer prometheus.Gatherer + isAgent bool + statsRenderer StatsRenderer + notificationsGetter func() []notifications.Notification + notificationsSub func() (<-chan notifications.Notification, func(), bool) remoteWriteHandler http.Handler remoteReadHandler http.Handler @@ -236,7 +241,7 @@ func NewAPI( db TSDBAdminStats, dbDir string, enableAdmin bool, - logger log.Logger, + logger *slog.Logger, rr func(context.Context) RulesRetriever, remoteReadSampleLimit int, remoteReadConcurrencyLimit int, @@ -245,6 +250,8 @@ func NewAPI( corsOrigin *regexp.Regexp, runtimeInfo func() (RuntimeInfo, error), buildInfo *PrometheusVersion, + notificationsGetter func() []notifications.Notification, + notificationsSub func() (<-chan notifications.Notification, func(), bool), gatherer prometheus.Gatherer, registerer prometheus.Registerer, statsRenderer StatsRenderer, @@ -261,22 +268,24 @@ func NewAPI( targetRetriever: tr, alertmanagerRetriever: ar, - now: time.Now, - config: configFunc, - flagsMap: flagsMap, - ready: readyFunc, - globalURLOptions: globalURLOptions, - db: db, - dbDir: dbDir, - enableAdmin: enableAdmin, - rulesRetriever: rr, - logger: logger, - CORSOrigin: corsOrigin, - runtimeInfo: runtimeInfo, - buildInfo: buildInfo, - gatherer: gatherer, - isAgent: isAgent, - statsRenderer: DefaultStatsRenderer, + now: time.Now, + config: configFunc, + flagsMap: flagsMap, + ready: readyFunc, + globalURLOptions: globalURLOptions, + db: db, + dbDir: dbDir, + enableAdmin: enableAdmin, + rulesRetriever: rr, + logger: logger, + CORSOrigin: corsOrigin, + runtimeInfo: runtimeInfo, + buildInfo: buildInfo, + gatherer: gatherer, + isAgent: isAgent, + statsRenderer: DefaultStatsRenderer, + notificationsGetter: notificationsGetter, + notificationsSub: notificationsSub, remoteReadHandler: remote.NewReadHandler(logger, registerer, q, configFunc, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame), } @@ -390,6 +399,8 @@ func (api *API) Register(r *route.Router) { r.Get("/status/flags", wrap(api.serveFlags)) r.Get("/status/tsdb", wrapAgent(api.serveTSDBStatus)) r.Get("/status/walreplay", api.serveWALReplayStatus) + r.Get("/notifications", api.notifications) + r.Get("/notifications/live", api.notificationsSSE) r.Post("/read", api.ready(api.remoteRead)) r.Post("/write", api.ready(api.remoteWrite)) r.Post("/otlp/v1/metrics", api.ready(api.otlpWrite)) @@ -824,12 +835,22 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) { } var ( - // MinTime is the default timestamp used for the begin of optional time ranges. - // Exposed to let downstream projects to reference it. + // MinTime is the default timestamp used for the start of optional time ranges. + // Exposed to let downstream projects reference it. + // + // Historical note: This should just be time.Unix(math.MinInt64/1000, 0).UTC(), + // but it was set to a higher value in the past due to a misunderstanding. + // The value is still low enough for practical purposes, so we don't want + // to change it now, avoiding confusion for importers of this variable. MinTime = time.Unix(math.MinInt64/1000+62135596801, 0).UTC() // MaxTime is the default timestamp used for the end of optional time ranges. // Exposed to let downstream projects to reference it. + // + // Historical note: This should just be time.Unix(math.MaxInt64/1000, 0).UTC(), + // but it was set to a lower value in the past due to a misunderstanding. + // The value is still high enough for practical purposes, so we don't want + // to change it now, avoiding confusion for importers of this variable. MaxTime = time.Unix(math.MaxInt64/1000-62135596801, 999999999).UTC() minTimeFormatted = MinTime.Format(time.RFC3339Nano) @@ -1352,7 +1373,8 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { // RuleDiscovery has info for all rules. type RuleDiscovery struct { - RuleGroups []*RuleGroup `json:"groups"` + RuleGroups []*RuleGroup `json:"groups"` + GroupNextToken string `json:"groupNextToken:omitempty"` } // RuleGroup has info for rules which are part of a group. @@ -1439,8 +1461,23 @@ func (api *API) rules(r *http.Request) apiFuncResult { return invalidParamError(err, "exclude_alerts") } + maxGroups, nextToken, parseErr := parseListRulesPaginationRequest(r) + if parseErr != nil { + return *parseErr + } + rgs := make([]*RuleGroup, 0, len(ruleGroups)) + + foundToken := false + for _, grp := range ruleGroups { + if maxGroups > 0 && nextToken != "" && !foundToken { + if nextToken != getRuleGroupNextToken(grp.File(), grp.Name()) { + continue + } + foundToken = true + } + if len(rgSet) > 0 { if _, ok := rgSet[grp.Name()]; !ok { continue @@ -1485,6 +1522,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { if !excludeAlerts { activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts()) } + enrichedRule = AlertingRule{ State: rule.State().String(), Name: rule.Name(), @@ -1500,6 +1538,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { LastEvaluation: rule.GetEvaluationTimestamp(), Type: "alerting", } + case *rules.RecordingRule: if !returnRecording { break @@ -1526,9 +1565,20 @@ func (api *API) rules(r *http.Request) apiFuncResult { // If the rule group response has no rules, skip it - this means we filtered all the rules of this group. if len(apiRuleGroup.Rules) > 0 { + if maxGroups > 0 && len(rgs) == int(maxGroups) { + // We've reached the capacity of our page plus one. That means that for sure there will be at least one + // rule group in a subsequent request. Therefore a next token is required. + res.GroupNextToken = getRuleGroupNextToken(grp.File(), grp.Name()) + break + } rgs = append(rgs, apiRuleGroup) } } + + if maxGroups > 0 && nextToken != "" && !foundToken { + return invalidParamError(fmt.Errorf("invalid group_next_token '%v'. were rule groups changed?", nextToken), "group_next_token") + } + res.RuleGroups = rgs return apiFuncResult{res, nil, nil, nil} } @@ -1547,6 +1597,44 @@ func parseExcludeAlerts(r *http.Request) (bool, error) { return excludeAlerts, nil } +func parseListRulesPaginationRequest(r *http.Request) (int64, string, *apiFuncResult) { + var ( + parsedMaxGroups int64 = -1 + err error + ) + maxGroups := r.URL.Query().Get("group_limit") + nextToken := r.URL.Query().Get("group_next_token") + + if nextToken != "" && maxGroups == "" { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be present in order to paginate over the groups"), "group_next_token") + return -1, "", &errResult + } + + if maxGroups != "" { + parsedMaxGroups, err = strconv.ParseInt(maxGroups, 10, 32) + if err != nil { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be a valid number: %w", err), "group_limit") + return -1, "", &errResult + } + if parsedMaxGroups <= 0 { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be greater than 0"), "group_limit") + return -1, "", &errResult + } + } + + if parsedMaxGroups > 0 { + return parsedMaxGroups, nextToken, nil + } + + return -1, "", nil +} + +func getRuleGroupNextToken(file, group string) string { + h := sha1.New() + h.Write([]byte(file + ";" + group)) + return hex.EncodeToString(h.Sum(nil)) +} + type prometheusConfig struct { YAML string `json:"yaml"` } @@ -1668,6 +1756,57 @@ func (api *API) serveWALReplayStatus(w http.ResponseWriter, r *http.Request) { }, nil, "") } +func (api *API) notifications(w http.ResponseWriter, r *http.Request) { + httputil.SetCORS(w, api.CORSOrigin, r) + api.respond(w, r, api.notificationsGetter(), nil, "") +} + +func (api *API) notificationsSSE(w http.ResponseWriter, r *http.Request) { + httputil.SetCORS(w, api.CORSOrigin, r) + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + // Subscribe to notifications. + notifications, unsubscribe, ok := api.notificationsSub() + if !ok { + w.WriteHeader(http.StatusNoContent) + return + } + defer unsubscribe() + + // Set up a flusher to push the response to the client. + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "Streaming unsupported", http.StatusInternalServerError) + return + } + + // Flush the response to ensure the headers are immediately and eventSource + // onopen is triggered client-side. + flusher.Flush() + + for { + select { + case notification := <-notifications: + // Marshal the notification to JSON. + jsonData, err := json.Marshal(notification) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + continue + } + + // Write the event data in SSE format with JSON content. + fmt.Fprintf(w, "data: %s\n\n", jsonData) + + // Flush the response to ensure the data is sent immediately. + flusher.Flush() + case <-r.Context().Done(): + return + } + } +} + func (api *API) remoteRead(w http.ResponseWriter, r *http.Request) { // This is only really for tests - this will never be nil IRL. if api.remoteReadHandler != nil { @@ -1689,7 +1828,7 @@ func (api *API) otlpWrite(w http.ResponseWriter, r *http.Request) { if api.otlpWriteHandler != nil { api.otlpWriteHandler.ServeHTTP(w, r) } else { - http.Error(w, "otlp write receiver needs to be enabled with --enable-feature=otlp-write-receiver", http.StatusNotFound) + http.Error(w, "otlp write receiver needs to be enabled with --web.enable-otlp-receiver", http.StatusNotFound) } } @@ -1792,7 +1931,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface b, err := codec.Encode(resp) if err != nil { - level.Error(api.logger).Log("msg", "error marshaling response", "url", req.URL, "err", err) + api.logger.Error("error marshaling response", "url", req.URL, "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -1800,7 +1939,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface w.Header().Set("Content-Type", codec.ContentType().String()) w.WriteHeader(http.StatusOK) if n, err := w.Write(b); err != nil { - level.Error(api.logger).Log("msg", "error writing response", "url", req.URL, "bytesWritten", n, "err", err) + api.logger.Error("error writing response", "url", req.URL, "bytesWritten", n, "err", err) } } @@ -1830,7 +1969,7 @@ func (api *API) respondError(w http.ResponseWriter, apiErr *apiError, data inter Data: data, }) if err != nil { - level.Error(api.logger).Log("msg", "error marshaling json response", "err", err) + api.logger.Error("error marshaling json response", "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -1858,7 +1997,7 @@ func (api *API) respondError(w http.ResponseWriter, apiErr *apiError, data inter w.Header().Set("Content-Type", "application/json") w.WriteHeader(code) if n, err := w.Write(b); err != nil { - level.Error(api.logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) + api.logger.Error("error writing response", "bytesWritten", n, "err", err) } } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index ef9d53dd9d..35ad4a9ad3 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -34,12 +34,11 @@ import ( "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/testutil" - "github.com/go-kit/log" jsoniter "github.com/json-iterator/go" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/route" "github.com/stretchr/testify/require" @@ -238,7 +237,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule2 := rules.NewAlertingRule( "test_metric4", @@ -250,7 +249,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule3 := rules.NewAlertingRule( "test_metric5", @@ -262,7 +261,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.FromStrings("name", "tm5"), "", false, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule4 := rules.NewAlertingRule( "test_metric6", @@ -274,7 +273,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule5 := rules.NewAlertingRule( "test_metric7", @@ -286,7 +285,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) var r []*rules.AlertingRule r = append(r, rule1) @@ -314,7 +313,7 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { QueryFunc: rules.EngineQueryFunc(engine, storage), Appendable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, } @@ -339,7 +338,15 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { ShouldRestore: false, Opts: opts, }) - m.ruleGroups = []*rules.Group{group} + group2 := rules.NewGroup(rules.GroupOptions{ + Name: "grp2", + File: "/path/to/file", + Interval: time.Second, + Rules: []rules.Rule{r[0]}, + ShouldRestore: false, + Opts: opts, + }) + m.ruleGroups = []*rules.Group{group, group2} } func (m *rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { @@ -471,20 +478,20 @@ func TestEndpoints(t *testing.T) { u, err := url.Parse(server.URL) require.NoError(t, err) - al := promlog.AllowedLevel{} + al := promslog.AllowedLevel{} require.NoError(t, al.Set("debug")) - af := promlog.AllowedFormat{} + af := promslog.AllowedFormat{} require.NoError(t, af.Set("logfmt")) - promlogConfig := promlog.Config{ + promslogConfig := promslog.Config{ Level: &al, Format: &af, } dbDir := t.TempDir() - remote := remote.NewStorage(promlog.New(&promlogConfig), prometheus.DefaultRegisterer, func() (int64, error) { + remote := remote.NewStorage(promslog.New(&promslogConfig), prometheus.DefaultRegisterer, func() (int64, error) { return 0, nil }, dbDir, 1*time.Second, nil, false) @@ -2242,6 +2249,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2330,6 +2356,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: nil, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2411,6 +2456,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2682,6 +2746,159 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, zeroFunc: rulesZeroFunc, }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + }, + response: &RuleDiscovery{ + GroupNextToken: getRuleGroupNextToken("/path/to/file", "grp2"), + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric6", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("testlabel", "rule"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric7", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("templatedlabel", "{{ $externalURL }}"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + RecordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Health: "ok", + Type: "recording", + }, + RecordingRule{ + Name: "recording-rule-2", + Query: "vector(1)", + Labels: labels.FromStrings("testlabel", "rule"), + Health: "ok", + Type: "recording", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { // invalid pagination request + endpoint: api.rules, + query: url.Values{ + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // invalid group_limit + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"0"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // Pagination token is invalid due to changes in the rule groups + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/removed/file", "notfound")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, { endpoint: api.queryExemplars, query: url.Values{ @@ -3530,7 +3747,7 @@ func TestAdminEndpoints(t *testing.T) { func TestRespondSuccess(t *testing.T) { api := API{ - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), } api.ClearCodecs() @@ -3622,7 +3839,7 @@ func TestRespondSuccess(t *testing.T) { func TestRespondSuccess_DefaultCodecCannotEncodeResponse(t *testing.T) { api := API{ - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), } api.ClearCodecs() @@ -4034,13 +4251,13 @@ func TestGetGlobalURL(t *testing.T) { false, }, { - mustParseURL(t, "http://exemple.com"), + mustParseURL(t, "http://example.com"), GlobalURLOptions{ ListenAddress: "127.0.0.1:9090", Host: "prometheus.io", Scheme: "https", }, - mustParseURL(t, "http://exemple.com"), + mustParseURL(t, "http://example.com"), false, }, { @@ -4176,7 +4393,7 @@ func TestExtractQueryOpts(t *testing.T) { if test.err == nil { require.NoError(t, err) } else { - require.Equal(t, test.err.Error(), err.Error()) + require.EqualError(t, err, test.err.Error()) } }) } diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go index 7e1fc09d8a..f5e75615ec 100644 --- a/web/api/v1/errors_test.go +++ b/web/api/v1/errors_test.go @@ -23,9 +23,9 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/route" "github.com/stretchr/testify/require" @@ -105,7 +105,7 @@ func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route t.Helper() engine := promqltest.NewTestEngineWithOpts(t, promql.EngineOpts{ - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Reg: nil, ActiveQueryTracker: nil, MaxSamples: 100, @@ -127,13 +127,15 @@ func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route nil, // Only needed for admin APIs. "", // This is for snapshots, which is disabled when admin APIs are disabled. Hence empty. false, // Disable admin APIs. - log.NewNopLogger(), + promslog.NewNopLogger(), func(context.Context) RulesRetriever { return &DummyRulesRetriever{} }, 0, 0, 0, // Remote read samples and concurrency limit. false, // Not an agent. regexp.MustCompile(".*"), func() (RuntimeInfo, error) { return RuntimeInfo{}, errors.New("not implemented") }, &PrometheusVersion{}, + nil, + nil, prometheus.DefaultGatherer, nil, nil, diff --git a/web/federate.go b/web/federate.go index 8176eba365..8e20a60f0f 100644 --- a/web/federate.go +++ b/web/federate.go @@ -21,7 +21,6 @@ import ( "sort" "strings" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" @@ -157,7 +156,7 @@ Loop: }) } if ws := set.Warnings(); len(ws) > 0 { - level.Debug(h.logger).Log("msg", "Federation select returned warnings", "warnings", ws) + h.logger.Debug("Federation select returned warnings", "warnings", ws) federationWarnings.Add(float64(len(ws))) } if set.Err() != nil { @@ -253,11 +252,11 @@ Loop: }) if err != nil { federationErrors.Inc() - level.Error(h.logger).Log("msg", "federation failed", "err", err) + h.logger.Error("federation failed", "err", err) return } if !nameSeen { - level.Warn(h.logger).Log("msg", "Ignoring nameless metric during federation", "metric", s.Metric) + h.logger.Warn("Ignoring nameless metric during federation", "metric", s.Metric) continue } // Attach global labels if they do not exist yet. @@ -314,7 +313,7 @@ Loop: if protMetricFam != nil { if err := enc.Encode(protMetricFam); err != nil { federationErrors.Inc() - level.Error(h.logger).Log("msg", "federation failed", "err", err) + h.logger.Error("federation failed", "err", err) } } } diff --git a/web/ui/README.md b/web/ui/README.md index 38087755e5..49ec27d8b4 100644 --- a/web/ui/README.md +++ b/web/ui/README.md @@ -12,6 +12,27 @@ in `.promu.yml`, and then `make build` (or build Prometheus using This will serve all files from your local filesystem. This is for development purposes only. +### Using Prebuilt UI Assets + +If you are only working on the go backend, for faster builds, you can use +prebuilt web UI assets available with each Prometheus release +(`prometheus-web-ui-.tar.gz`). This allows you to skip building the UI +from source. + +1. Download and extract the prebuilt UI tarball: + ```bash + tar -xvf prometheus-web-ui-.tar.gz -C web/ui + ``` + +2. Build Prometheus using the prebuilt assets by passing the following parameter + to `make`: + ```bash + make PREBUILT_ASSETS_STATIC_DIR=web/ui/static build + ``` + +This will include the prebuilt UI files directly in the Prometheus binary, +avoiding the need to install npm or rebuild the frontend from source. + ## React-app ### Introduction diff --git a/web/ui/mantine-ui/README.md b/web/ui/mantine-ui/README.md deleted file mode 100644 index 0d6babeddb..0000000000 --- a/web/ui/mantine-ui/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# React + TypeScript + Vite - -This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. - -Currently, two official plugins are available: - -- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh -- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh - -## Expanding the ESLint configuration - -If you are developing a production application, we recommend updating the configuration to enable type aware lint rules: - -- Configure the top-level `parserOptions` property like this: - -```js -export default { - // other rules... - parserOptions: { - ecmaVersion: 'latest', - sourceType: 'module', - project: ['./tsconfig.json', './tsconfig.node.json'], - tsconfigRootDir: __dirname, - }, -} -``` - -- Replace `plugin:@typescript-eslint/recommended` to `plugin:@typescript-eslint/recommended-type-checked` or `plugin:@typescript-eslint/strict-type-checked` -- Optionally add `plugin:@typescript-eslint/stylistic-type-checked` -- Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and add `plugin:react/recommended` & `plugin:react/jsx-runtime` to the `extends` list diff --git a/web/ui/mantine-ui/index.html b/web/ui/mantine-ui/index.html index deb5f7f56d..d2723488ac 100644 --- a/web/ui/mantine-ui/index.html +++ b/web/ui/mantine-ui/index.html @@ -7,19 +7,21 @@