diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index f4d17b3596..bb4e2d24c9 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,4 +1,4 @@
-blank_issues_enabled: false
+blank_issues_enabled: true
contact_links:
- name: Prometheus Community Support
url: https://prometheus.io/community/
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 89b2f4d0b6..191e07ffac 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,9 +1,22 @@
version: 2
updates:
- - package-ecosystem: "gomod"
+ - package-ecosystem: "docker"
directory: "/"
schedule:
interval: "monthly"
+ - package-ecosystem: "github-actions"
+ directories:
+ - "/"
+ - "/scripts"
+ schedule:
+ interval: "monthly"
+ - package-ecosystem: "gomod"
+ directories:
+ - "/"
+ - "/documentation/examples/remote_storage"
+ - "/internal/tools"
+ schedule:
+ interval: "monthly"
groups:
k8s.io:
patterns:
@@ -12,24 +25,3 @@ updates:
patterns:
- "go.opentelemetry.io/*"
open-pull-requests-limit: 20
- - package-ecosystem: "gomod"
- directory: "/documentation/examples/remote_storage"
- schedule:
- interval: "monthly"
- - package-ecosystem: "npm"
- directory: "/web/ui"
- schedule:
- interval: "monthly"
- open-pull-requests-limit: 20
- - package-ecosystem: "github-actions"
- directory: "/"
- schedule:
- interval: "monthly"
- - package-ecosystem: "github-actions"
- directory: "/scripts"
- schedule:
- interval: "monthly"
- - package-ecosystem: "docker"
- directory: "/"
- schedule:
- interval: "monthly"
diff --git a/.github/stale.yml b/.github/stale.yml
deleted file mode 100644
index 66a72af533..0000000000
--- a/.github/stale.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-# Configuration for probot-stale - https://github.com/probot/stale
-
-# Number of days of inactivity before an Issue or Pull Request becomes stale
-daysUntilStale: 60
-
-# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
-# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
-daysUntilClose: false
-
-# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
-onlyLabels: []
-
-# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
-exemptLabels:
- - keepalive
-
-# Set to true to ignore issues in a project (defaults to false)
-exemptProjects: false
-
-# Set to true to ignore issues in a milestone (defaults to false)
-exemptMilestones: false
-
-# Set to true to ignore issues with an assignee (defaults to false)
-exemptAssignees: false
-
-# Label to use when marking as stale
-staleLabel: stale
-
-# Comment to post when marking as stale. Set to `false` to disable
-markComment: false
-
-# Comment to post when removing the stale label.
-# unmarkComment: >
-# Your comment here.
-
-# Comment to post when closing a stale Issue or Pull Request.
-# closeComment: >
-# Your comment here.
-
-# Limit the number of actions per hour, from 1-30. Default is 30
-limitPerRun: 30
-
-# Limit to only `issues` or `pulls`
-only: pulls
-
-# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
-# pulls:
-# daysUntilStale: 30
-# markComment: >
-# This pull request has been automatically marked as stale because it has not had
-# recent activity. It will be closed if no further activity occurs. Thank you
-# for your contributions.
-
-# issues:
-# exemptLabels:
-# - confirmed
diff --git a/.github/workflows/automerge-dependabot.yml b/.github/workflows/automerge-dependabot.yml
new file mode 100644
index 0000000000..3909f57329
--- /dev/null
+++ b/.github/workflows/automerge-dependabot.yml
@@ -0,0 +1,30 @@
+---
+name: Dependabot auto-merge
+on: pull_request
+
+concurrency:
+ group: ${{ github.workflow }}-${{ (github.event.pull_request && github.event.pull_request.number) || github.ref || github.run_id }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+jobs:
+ dependabot:
+ permissions:
+ contents: write
+ pull-requests: write
+ runs-on: ubuntu-latest
+ if: ${{ github.event.pull_request.user.login == 'dependabot[bot]' && github.repository_owner == 'prometheus' }}
+ steps:
+ - name: Dependabot metadata
+ id: metadata
+ uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # v2.3.0
+ with:
+ github-token: "${{ secrets.GITHUB_TOKEN }}"
+ - name: Enable auto-merge for Dependabot PRs
+ if: ${{steps.metadata.outputs.update-type == 'version-update:semver-minor' || steps.metadata.outputs.update-type == 'version-update:semver-patch'}}
+ run: gh pr merge --auto --merge "$PR_URL"
+ env:
+ PR_URL: ${{github.event.pull_request.html_url}}
+ GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml
index cbfeb2ba5b..4e942f1f3b 100644
--- a/.github/workflows/buf-lint.yml
+++ b/.github/workflows/buf-lint.yml
@@ -12,8 +12,10 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: bufbuild/buf-setup-action@35c243d7f2a909b1d4e40399b348a7fdab27d78d # v1.34.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1
diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml
index 8b964ef24c..add72cc89c 100644
--- a/.github/workflows/buf.yml
+++ b/.github/workflows/buf.yml
@@ -12,8 +12,10 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: bufbuild/buf-setup-action@35c243d7f2a909b1d4e40399b348a7fdab27d78d # v1.34.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8b3624383c..ea10fd0091 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,6 +4,9 @@ on:
pull_request:
push:
+permissions:
+ contents: read
+
jobs:
test_go:
name: Go tests
@@ -11,13 +14,17 @@ jobs:
container:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
- image: quay.io/prometheus/golang-builder:1.22-base
+ image: quay.io/prometheus/golang-builder:1.24-base
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: ./.github/promci/actions/setup_environment
- - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1
- - run: go test --tags=stringlabels ./tsdb/ -test.tsdb-isolation=false
+ with:
+ enable_npm: true
+ - run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1
+ - run: go test ./tsdb/ -test.tsdb-isolation=false
- run: make -C documentation/examples/remote_storage
- run: make -C documentation/examples
@@ -25,13 +32,17 @@ jobs:
name: More Go tests
runs-on: ubuntu-latest
container:
- image: quay.io/prometheus/golang-builder:1.22-base
+ image: quay.io/prometheus/golang-builder:1.24-base
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: ./.github/promci/actions/setup_environment
- run: go test --tags=dedupelabels ./...
- - run: GOARCH=386 go test ./cmd/prometheus
+ - run: go test --tags=slicelabels -race ./cmd/prometheus
+ - run: go test --tags=forcedirectio -race ./tsdb/
+ - run: GOARCH=386 go test ./...
- uses: ./.github/promci/actions/check_proto
with:
version: "3.15.8"
@@ -39,11 +50,16 @@ jobs:
test_go_oldest:
name: Go tests with previous Go version
runs-on: ubuntu-latest
+ env:
+ # Enforce the Go version.
+ GOTOOLCHAIN: local
container:
# The go version in this image should be N-1 wrt test_go.
- image: quay.io/prometheus/golang-builder:1.21-base
+ image: quay.io/prometheus/golang-builder:1.23-base
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
- run: make build
# Don't run NPM build; don't run race-detector.
- run: make test GO_ONLY=1 test-flags=""
@@ -54,11 +70,13 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
- image: quay.io/prometheus/golang-builder:1.22-base
+ image: quay.io/prometheus/golang-builder:1.24-base
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: ./.github/promci/actions/setup_environment
with:
enable_go: false
@@ -74,12 +92,14 @@ jobs:
name: Go tests on Windows
runs-on: windows-latest
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
- go-version: 1.22.x
+ persist-credentials: false
+ - uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0
+ with:
+ go-version: 1.24.x
- run: |
- $TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
+ $TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
go test $TestTargets -vet=off -v
shell: powershell
@@ -89,9 +109,11 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
- image: quay.io/prometheus/golang-builder:1.22-base
+ image: quay.io/prometheus/golang-builder:1.24-base
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
- run: go install ./cmd/promtool/.
- run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest
- run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
@@ -107,6 +129,8 @@ jobs:
if: |
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
&&
+ !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
+ &&
!(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
&&
!(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
@@ -114,8 +138,10 @@ jobs:
matrix:
thread: [ 0, 1, 2 ]
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: ./.github/promci/actions/build
with:
promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386"
@@ -127,6 +153,8 @@ jobs:
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
+ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
+ ||
(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
||
(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
@@ -137,23 +165,50 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: ./.github/promci/actions/build
with:
parallelism: 12
thread: ${{ matrix.thread }}
+ build_all_status:
+ # This status check aggregates the individual matrix jobs of the "Build
+ # Prometheus for all architectures" step into a final status. Fails if a
+ # single matrix job fails, succeeds if all matrix jobs succeed.
+ # See https://github.com/orgs/community/discussions/4324 for why this is
+ # needed
+ name: Report status of build Prometheus for all architectures
+ runs-on: ubuntu-latest
+ needs: [build_all]
+ # The run condition needs to include always(). Otherwise actions
+ # behave unexpected:
+ # only "needs" will make the Status Report be skipped if one of the builds fails https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-jobs-in-a-workflow#defining-prerequisite-jobs
+ # And skipped is treated as success https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborat[…]n-repositories-with-code-quality-features/about-status-checks
+ # Adding always ensures that the status check is run independently of the
+ # results of Build All
+ if: always() && github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')
+ steps:
+ - name: Successful build
+ if: ${{ !(contains(needs.*.result, 'failure')) && !(contains(needs.*.result, 'cancelled')) }}
+ run: exit 0
+ - name: Failing or cancelled build
+ if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
+ run: exit 1
check_generated_parser:
name: Check generated parser
runs-on: ubuntu-latest
steps:
- name: Checkout repository
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
- name: Install Go
- uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
+ uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0
with:
cache: false
- go-version: 1.22.x
+ go-version: 1.24.x
- name: Run goyacc and check for diff
run: make install-goyacc check-generated-parser
golangci:
@@ -161,25 +216,30 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - name: Install Go
- uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
- go-version: 1.22.x
+ persist-credentials: false
+ - name: Install Go
+ uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0
+ with:
+ go-version: 1.24.x
- name: Install snmp_exporter/generator dependencies
run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
if: github.repository == 'prometheus/snmp_exporter'
- name: Lint
- uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1
+ uses: golangci/golangci-lint-action@1481404843c368bc19ca9406f87d6e0fc97bdcfd # v7.0.0
with:
args: --verbose
# Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
- version: v1.59.1
+ version: v2.1.5
fuzzing:
uses: ./.github/workflows/fuzzing.yml
if: github.event_name == 'pull_request'
codeql:
uses: ./.github/workflows/codeql-analysis.yml
+ permissions:
+ contents: read
+ security-events: write
publish_main:
name: Publish main branch artifacts
@@ -187,8 +247,10 @@ jobs:
needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: ./.github/promci/actions/publish_main
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@@ -199,10 +261,15 @@ jobs:
name: Publish release artefacts
runs-on: ubuntu-latest
needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all]
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
+ if: |
+ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
+ ||
+ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: ./.github/promci/actions/publish_release
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@@ -216,31 +283,44 @@ jobs:
needs: [test_ui, codeql]
steps:
- name: Checkout
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
+ - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- name: Install nodejs
- uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2
+ uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version-file: "web/ui/.nvmrc"
registry-url: "https://registry.npmjs.org"
- - uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+ - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
- name: Check libraries version
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
- run: ./scripts/ui_release.sh --check-package "$(echo ${{ github.ref_name }}|sed s/v2/v0/)"
+ if: |
+ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
+ ||
+ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
+ run: ./scripts/ui_release.sh --check-package "$(./scripts/get_module_version.sh ${GH_REF_NAME})"
+ env:
+ GH_REF_NAME: ${{ github.ref_name }}
- name: build
run: make assets
- name: Copy files before publishing libs
run: ./scripts/ui_release.sh --copy
- name: Publish dry-run libraries
- if: "!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))"
+ if: |
+ !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
+ &&
+ !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --publish dry-run
- name: Publish libraries
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
+ if: |
+ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
+ ||
+ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --publish
env:
# The setup-node action writes an .npmrc file with this env variable
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 12ffc659c2..b444815d3c 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -24,15 +24,17 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
- name: Initialize CodeQL
- uses: github/codeql-action/init@b611370bb5703a7efb587f9d136a52ea24c5c38c # v3.25.11
+ uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
with:
languages: ${{ matrix.language }}
- name: Autobuild
- uses: github/codeql-action/autobuild@b611370bb5703a7efb587f9d136a52ea24c5c38c # v3.25.11
+ uses: github/codeql-action/autobuild@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@b611370bb5703a7efb587f9d136a52ea24c5c38c # v3.25.11
+ uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml
index 8ddbc34aeb..7de8bb8da7 100644
--- a/.github/workflows/container_description.yml
+++ b/.github/workflows/container_description.yml
@@ -18,7 +18,9 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
- name: Set docker hub repo name
run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV
- name: Push README to Dockerhub
@@ -40,7 +42,9 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
- name: Set quay.io org name
run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV
- name: Set quay.io repo name
diff --git a/.github/workflows/funcbench.yml b/.github/workflows/funcbench.yml
deleted file mode 100644
index 8959f82142..0000000000
--- a/.github/workflows/funcbench.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-on:
- repository_dispatch:
- types: [funcbench_start]
-name: Funcbench Workflow
-permissions:
- contents: read
-
-jobs:
- run_funcbench:
- name: Running funcbench
- if: github.event.action == 'funcbench_start'
- runs-on: ubuntu-latest
- env:
- AUTH_FILE: ${{ secrets.TEST_INFRA_PROVIDER_AUTH }}
- BRANCH: ${{ github.event.client_payload.BRANCH }}
- BENCH_FUNC_REGEX: ${{ github.event.client_payload.BENCH_FUNC_REGEX }}
- PACKAGE_PATH: ${{ github.event.client_payload.PACKAGE_PATH }}
- GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}
- GITHUB_ORG: prometheus
- GITHUB_REPO: prometheus
- GITHUB_STATUS_TARGET_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
- LAST_COMMIT_SHA: ${{ github.event.client_payload.LAST_COMMIT_SHA }}
- GKE_PROJECT_ID: macro-mile-203600
- PR_NUMBER: ${{ github.event.client_payload.PR_NUMBER }}
- PROVIDER: gke
- ZONE: europe-west3-a
- steps:
- - name: Update status to pending
- run: >-
- curl -i -X POST
- -H "Authorization: Bearer $GITHUB_TOKEN"
- -H "Content-Type: application/json"
- --data '{"state":"pending","context":"funcbench-status","target_url":"'$GITHUB_STATUS_TARGET_URL'"}'
- "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA"
- - name: Prepare nodepool
- uses: docker://prominfra/funcbench:master
- with:
- entrypoint: "docker_entrypoint"
- args: make deploy
- - name: Delete all resources
- if: always()
- uses: docker://prominfra/funcbench:master
- with:
- entrypoint: "docker_entrypoint"
- args: make clean
- - name: Update status to failure
- if: failure()
- run: >-
- curl -i -X POST
- -H "Authorization: Bearer $GITHUB_TOKEN"
- -H "Content-Type: application/json"
- --data '{"state":"failure","context":"funcbench-status","target_url":"'$GITHUB_STATUS_TARGET_URL'"}'
- "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA"
- - name: Update status to success
- if: success()
- run: >-
- curl -i -X POST
- -H "Authorization: Bearer $GITHUB_TOKEN"
- -H "Content-Type: application/json"
- --data '{"state":"success","context":"funcbench-status","target_url":"'$GITHUB_STATUS_TARGET_URL'"}'
- "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA"
diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml
index dc510e5966..27c09b4187 100644
--- a/.github/workflows/fuzzing.yml
+++ b/.github/workflows/fuzzing.yml
@@ -10,18 +10,20 @@ jobs:
steps:
- name: Build Fuzzers
id: build
- uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+ uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@cafd7a0eb8ecb4e007c56897996a9b65c49c972f # master
with:
oss-fuzz-project-name: "prometheus"
dry-run: false
- name: Run Fuzzers
- uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+ uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@cafd7a0eb8ecb4e007c56897996a9b65c49c972f # master
+ # Note: Regularly check for updates to the pinned commit hash at:
+ # https://github.com/google/oss-fuzz/tree/master/infra/cifuzz/actions/run_fuzzers
with:
oss-fuzz-project-name: "prometheus"
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
- uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
+ uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts
diff --git a/.github/workflows/prombench.yml b/.github/workflows/prombench.yml
index 6ee172662b..65d1d71917 100644
--- a/.github/workflows/prombench.yml
+++ b/.github/workflows/prombench.yml
@@ -2,6 +2,8 @@ on:
repository_dispatch:
types: [prombench_start, prombench_restart, prombench_stop]
name: Prombench Workflow
+permissions:
+ contents: read
env:
AUTH_FILE: ${{ secrets.TEST_INFRA_PROVIDER_AUTH }}
CLUSTER_NAME: test-infra
@@ -15,6 +17,8 @@ env:
PR_NUMBER: ${{ github.event.client_payload.PR_NUMBER }}
PROVIDER: gke
RELEASE: ${{ github.event.client_payload.RELEASE }}
+ BENCHMARK_VERSION: ${{ github.event.client_payload.BENCHMARK_VERSION }}
+ BENCHMARK_DIRECTORY: ${{ github.event.client_payload.BENCHMARK_DIRECTORY }}
ZONE: europe-west3-a
jobs:
benchmark_start:
diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml
index 537e9abd84..fa8d2e5abe 100644
--- a/.github/workflows/repo_sync.yml
+++ b/.github/workflows/repo_sync.yml
@@ -13,7 +13,9 @@ jobs:
container:
image: quay.io/prometheus/golang-builder
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ persist-credentials: false
- run: ./scripts/sync_repo_files.sh
env:
GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index c82fa87a1e..c2335a8e46 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -21,12 +21,12 @@ jobs:
steps:
- name: "Checkout code"
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # tag=v4.1.6
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
with:
persist-credentials: false
- name: "Run analysis"
- uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # tag=v2.3.3
+ uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # tag=v2.4.1
with:
results_file: results.sarif
results_format: sarif
@@ -37,7 +37,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
- uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # tag=v4.3.3
+ uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # tag=v4.6.2
with:
name: SARIF file
path: results.sarif
@@ -45,6 +45,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@b611370bb5703a7efb587f9d136a52ea24c5c38c # tag=v3.25.11
+ uses: github/codeql-action/upload-sarif@28deaeda66b76a05916b6923827895f2b14ab387 # tag=v3.28.16
with:
sarif_file: results.sarif
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 0000000000..371d92a69a
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,31 @@
+name: Stale Check
+on:
+ workflow_dispatch: {}
+ schedule:
+ - cron: '16 22 * * *'
+permissions:
+ issues: write
+ pull-requests: write
+jobs:
+ stale:
+ if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ # opt out of defaults to avoid marking issues as stale and closing them
+ # https://github.com/actions/stale#days-before-close
+ # https://github.com/actions/stale#days-before-stale
+ days-before-stale: -1
+ days-before-close: -1
+ # Setting it to empty string to skip comments.
+ # https://github.com/actions/stale#stale-pr-message
+ # https://github.com/actions/stale#stale-issue-message
+ stale-pr-message: ''
+ stale-issue-message: ''
+ operations-per-run: 30
+ # override days-before-stale, for only marking the pull requests as stale
+ days-before-pr-stale: 60
+ stale-pr-label: stale
+ exempt-pr-labels: keepalive
diff --git a/.gitignore b/.gitignore
index e85d766b09..0d99305f69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,7 +22,7 @@ benchmark.txt
/documentation/examples/remote_storage/example_write_adapter/example_write_adapter
npm_licenses.tar.bz2
-/web/ui/static/react
+/web/ui/static
/vendor
/.build
diff --git a/.golangci.yml b/.golangci.yml
index e924fe3d5b..1a744b4142 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -1,146 +1,177 @@
-run:
- timeout: 15m
+formatters:
+ enable:
+ - gci
+ - gofumpt
+ - goimports
+ settings:
+ gci:
+ sections:
+ - standard
+ - default
+ - prefix(github.com/prometheus/prometheus)
+ gofumpt:
+ extra-rules: true
+ goimports:
+ local-prefixes:
+ - github.com/prometheus/prometheus
-output:
- sort-results: true
+issues:
+ max-issues-per-linter: 0
+ max-same-issues: 0
linters:
+ # Keep this list sorted alphabetically
enable:
- depguard
- errorlint
+ - exptostd
- gocritic
- godot
- - gofumpt
- - goimports
+ - loggercheck
- misspell
- - nolintlint
+ - nilnesserr
+ # TODO(bwplotka): Enable once https://github.com/golangci/golangci-lint/issues/3228 is fixed.
+ # - nolintlint
- perfsprint
- predeclared
- revive
+ - sloglint
- testifylint
- unconvert
- unused
- usestdlibvars
- whitespace
- - loggercheck
-issues:
- max-same-issues: 0
- exclude-files:
- # Skip autogenerated files.
- - ^.*\.(pb|y)\.go$
- exclude-dirs:
- # Copied it from a different source
- - storage/remote/otlptranslator/prometheusremotewrite
- - storage/remote/otlptranslator/prometheus
- exclude-rules:
- - linters:
- - gocritic
- text: "appendAssign"
- - path: _test.go
- linters:
- - errcheck
- - path: "tsdb/head_wal.go"
- linters:
- - errorlint
- - linters:
- - godot
- source: "^// ==="
- - linters:
- - perfsprint
- text: "fmt.Sprintf can be replaced with string concatenation"
-linters-settings:
- depguard:
+ exclusions:
+ paths:
+ # Skip autogenerated files.
+ - ^.*\.(pb|y)\.go$
rules:
- main:
- deny:
- - pkg: "sync/atomic"
- desc: "Use go.uber.org/atomic instead of sync/atomic"
- - pkg: "github.com/stretchr/testify/assert"
- desc: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert"
- - pkg: "github.com/go-kit/kit/log"
- desc: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log"
- - pkg: "io/ioutil"
- desc: "Use corresponding 'os' or 'io' functions instead."
- - pkg: "regexp"
- desc: "Use github.com/grafana/regexp instead of regexp"
- - pkg: "github.com/pkg/errors"
- desc: "Use 'errors' or 'fmt' instead of github.com/pkg/errors"
- - pkg: "gzip"
- desc: "Use github.com/klauspost/compress instead of gzip"
- - pkg: "zlib"
- desc: "Use github.com/klauspost/compress instead of zlib"
- - pkg: "golang.org/x/exp/slices"
- desc: "Use 'slices' instead."
- errcheck:
- exclude-functions:
- # Don't flag lines such as "io.Copy(io.Discard, resp.Body)".
- - io.Copy
- # The next two are used in HTTP handlers, any error is handled by the server itself.
- - io.WriteString
- - (net/http.ResponseWriter).Write
- # No need to check for errors on server's shutdown.
- - (*net/http.Server).Shutdown
- # Never check for logger errors.
- - (github.com/go-kit/log.Logger).Log
- # Never check for rollback errors as Rollback() is called when a previous error was detected.
- - (github.com/prometheus/prometheus/storage.Appender).Rollback
- goimports:
- local-prefixes: github.com/prometheus/prometheus
- gofumpt:
- extra-rules: true
- perfsprint:
- # Optimizes `fmt.Errorf`.
- errorf: false
- revive:
- # By default, revive will enable only the linting rules that are named in the configuration file.
- # So, it's needed to explicitly set in configuration all required rules.
- # The following configuration enables all the rules from the defaults.toml
- # https://github.com/mgechev/revive/blob/master/defaults.toml
- rules:
- # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
- - name: blank-imports
- - name: context-as-argument
- arguments:
- # allow functions with test or bench signatures
- - allowTypesBefore: "*testing.T,testing.TB"
- - name: context-keys-type
- - name: dot-imports
- # A lot of false positives: incorrectly identifies channel draining as "empty code block".
- # See https://github.com/mgechev/revive/issues/386
- - name: empty-block
- disabled: true
- - name: error-naming
- - name: error-return
- - name: error-strings
- - name: errorf
- - name: exported
- - name: increment-decrement
- - name: indent-error-flow
- - name: package-comments
- - name: range
- - name: receiver-naming
- - name: redefines-builtin-id
- - name: superfluous-else
- - name: time-naming
- - name: unexported-return
- - name: unreachable-code
- - name: unused-parameter
- disabled: true
- - name: var-declaration
- - name: var-naming
- testifylint:
- disable:
- - float-compare
- - go-require
- enable:
- - bool-compare
- - compares
- - empty
- - error-is-as
- - error-nil
- - expected-actual
- - len
- - require-error
- - suite-dont-use-pkg
- - suite-extra-assert-call
+ - linters:
+ - errcheck
+ # Taken from the default exclusions in v1.
+ text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked
+ - linters:
+ - govet
+ # We use many Seek methods that do not follow the usual pattern.
+ text: "stdmethods: method Seek.* should have signature Seek"
+ - linters:
+ - revive
+ # We have stopped at some point to write doc comments on exported symbols.
+ # TODO(beorn7): Maybe we should enforce this again? There are ~500 offenders right now.
+ text: exported (.+) should have comment( \(or a comment on this block\))? or be unexported
+ - linters:
+ - gocritic
+ text: "appendAssign"
+ - linters:
+ - errcheck
+ path: _test.go
+ - linters:
+ - errorlint
+ path: "tsdb/head_wal.go"
+ - linters:
+ - godot
+ source: "^// ==="
+ warn-unused: true
+ settings:
+ depguard:
+ rules:
+ main:
+ deny:
+ - pkg: "sync/atomic"
+ desc: "Use go.uber.org/atomic instead of sync/atomic"
+ - pkg: "github.com/stretchr/testify/assert"
+ desc: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert"
+ - pkg: "github.com/go-kit/kit/log"
+ desc: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log"
+ - pkg: "io/ioutil"
+ desc: "Use corresponding 'os' or 'io' functions instead."
+ - pkg: "regexp"
+ desc: "Use github.com/grafana/regexp instead of regexp"
+ - pkg: "github.com/pkg/errors"
+ desc: "Use 'errors' or 'fmt' instead of github.com/pkg/errors"
+ - pkg: "gzip"
+ desc: "Use github.com/klauspost/compress instead of gzip"
+ - pkg: "zlib"
+ desc: "Use github.com/klauspost/compress instead of zlib"
+ - pkg: "golang.org/x/exp/slices"
+ desc: "Use 'slices' instead."
+ errcheck:
+ exclude-functions:
+ # Don't flag lines such as "io.Copy(io.Discard, resp.Body)".
+ - io.Copy
+ # The next two are used in HTTP handlers, any error is handled by the server itself.
+ - io.WriteString
+ - (net/http.ResponseWriter).Write
+ # No need to check for errors on server's shutdown.
+ - (*net/http.Server).Shutdown
+ # Never check for rollback errors as Rollback() is called when a previous error was detected.
+ - (github.com/prometheus/prometheus/storage.Appender).Rollback
+ perfsprint:
+ # Optimizes even if it requires an int or uint type cast.
+ int-conversion: true
+ # Optimizes into `err.Error()` even if it is only equivalent for non-nil errors.
+ err-error: true
+ # Optimizes `fmt.Errorf`.
+ errorf: true
+ # Optimizes `fmt.Sprintf` with only one argument.
+ sprintf1: true
+ # Optimizes into strings concatenation.
+ strconcat: false
+ revive:
+ # By default, revive will enable only the linting rules that are named in the configuration file.
+ # So, it's needed to explicitly enable all required rules here.
+ rules:
+ # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
+ - name: blank-imports
+ - name: comment-spacings
+ - name: context-as-argument
+ arguments:
+ # Allow functions with test or bench signatures.
+ - allowTypesBefore: '*testing.T,testing.TB'
+ - name: context-keys-type
+ - name: dot-imports
+ - name: early-return
+ arguments:
+ - "preserveScope"
+ # A lot of false positives: incorrectly identifies channel draining as "empty code block".
+ # See https://github.com/mgechev/revive/issues/386
+ - name: empty-block
+ disabled: true
+ - name: error-naming
+ - name: error-return
+ - name: error-strings
+ - name: errorf
+ - name: exported
+ - name: increment-decrement
+ - name: indent-error-flow
+ arguments:
+ - "preserveScope"
+ - name: package-comments
+ # TODO(beorn7): Currently, we have a lot of missing package doc comments. Maybe we should have them.
+ disabled: true
+ - name: range
+ - name: receiver-naming
+ - name: redefines-builtin-id
+ - name: superfluous-else
+ arguments:
+ - "preserveScope"
+ - name: time-naming
+ - name: unexported-return
+ - name: unreachable-code
+ - name: unused-parameter
+ - name: var-declaration
+ - name: var-naming
+ testifylint:
+ disable:
+ - float-compare
+ - go-require
+ enable-all: true
+
+output:
+ show-stats: false
+
+run:
+ timeout: 15m
+
+version: "2"
diff --git a/.promu.yml b/.promu.yml
index 0aa51d6d31..d16bceeed9 100644
--- a/.promu.yml
+++ b/.promu.yml
@@ -1,7 +1,7 @@
go:
# Whenever the Go version is updated here,
# .github/workflows should also be updated.
- version: 1.22
+ version: 1.24
repository:
path: github.com/prometheus/prometheus
build:
@@ -14,10 +14,8 @@ build:
all:
- netgo
- builtinassets
- - stringlabels
windows:
- builtinassets
- - stringlabels
ldflags: |
-X github.com/prometheus/common/version.Version={{.Version}}
-X github.com/prometheus/common/version.Revision={{.Revision}}
@@ -28,8 +26,6 @@ tarball:
# Whenever there are new files to include in the tarball,
# remember to make sure the new files will be generated after `make build`.
files:
- - consoles
- - console_libraries
- documentation/examples/prometheus.yml
- LICENSE
- NOTICE
diff --git a/.yamllint b/.yamllint
index 1859cb624b..8d09c375fd 100644
--- a/.yamllint
+++ b/.yamllint
@@ -1,7 +1,7 @@
---
extends: default
ignore: |
- ui/react-app/node_modules
+ **/node_modules
rules:
braces:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d5a91e9009..793a625bd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,320 @@
# Changelog
-## unreleased
+## main / unreleased
+
+* [FEATURE] OTLP receiver: Support promoting OTel scope name/version/schema URL/attributes as metric labels, enable via configuration parameter `otlp.promote_scope_metadata`. #16730 #16760
+
+## 3.4.2 / 2025-06-26
+
+* [BUGFIX] OTLP receiver: Fix default configuration not being respected if the `otlp:` block is unset in the config file. #16693
+
+## 3.4.1 / 2025-05-31
+
+* [BUGFIX] Parser: Add reproducer for a dangling-reference issue in parsers. #16633
+
+## 3.4.0 / 2025-05-17
+
+* [CHANGE] Config: Make setting out-of-order native histograms feature (`--enable-feature=ooo-native-histograms`) a no-op. Out-of-order native histograms are now always enabled when `out_of_order_time_window` is greater than zero and `--enable-feature=native-histograms` is set. #16207
+* [FEATURE] OTLP translate: Add feature flag for optionally translating OTel explicit bucket histograms into native histograms with custom buckets. #15850
+* [FEATURE] OTLP translate: Add option to receive OTLP metrics without translating names or attributes. #16441
+* [FEATURE] PromQL: allow arithmetic operations in durations in PromQL parser. #16249
+* [FEATURE] OTLP receiver: Add primitive support for ingesting OTLP delta metrics as-is. #16360
+* [ENHANCEMENT] PromQL: histogram_fraction for bucket histograms. #16095
+* [ENHANCEMENT] TSDB: add `prometheus_tsdb_wal_replay_unknown_refs_total` and `prometheus_tsdb_wbl_replay_unknown_refs_total` metrics to track unknown series references during WAL/WBL replay. #16166
+* [ENHANCEMENT] Scraping: Add config option for escaping scheme request. #16066
+* [ENHANCEMENT] Config: Add global config option for convert_classic_histograms_to_nhcb. #16226
+* [ENHANCEMENT] Alerting: make batch size configurable (`--alertmanager.notification-batch-size`). #16254
+* [PERF] Kubernetes SD: make endpointSlice discovery more efficient. #16433
+* [BUGFIX] Config: Fix auto-reload on changes to rule and scrape config files. #16340
+* [BUGFIX] Scraping: Skip native histogram series if ingestion is disabled. #16218
+* [BUGFIX] TSDB: Handle metadata/tombstones/exemplars for duplicate series during WAL replay. #16231
+* [BUGFIX] TSDB: Avoid processing exemplars outside the valid time range during WAL replay. #16242
+* [BUGFIX] Promtool: Add feature flags for PromQL features. #16443
+* [BUGFIX] Rules: correct logging of alert name & template data. #15093
+* [BUGFIX] PromQL: Use arithmetic mean for `histogram_stddev()` and `histogram_stdvar()` . #16444
+
+## 3.3.0 / 2025-04-15
+
+* [FEATURE] PromQL: Implement `idelta()` and `irate()` for native histograms. #15853
+* [ENHANCEMENT] Scaleway SD: Add `__meta_scaleway_instance_public_ipv4_addresses` and `__meta_scaleway_instance_public_ipv6_addresses` labels. #14228
+* [ENHANCEMENT] TSDB: Reduce locking while reloading blocks. #12920
+* [ENHANCEMENT] PromQL: Allow UTF-8 labels in `label_replace()`. #15974
+* [ENHANCEMENT] Promtool: `tsdb create-blocks-from openmetrics` can now read from a Pipe. #16011
+* [ENHANCEMENT] Rules: Add support for anchors and aliases in rule files. #14957
+* [ENHANCEMENT] Dockerfile: Make `/prometheus` writable. #16073
+* [ENHANCEMENT] API: Include scrape pool name for dropped targets in `/api/v1/targets`. #16085
+* [ENHANCEMENT] UI: Improve time formatting and copying of selectors. #15999 #16165
+* [ENHANCEMENT] UI: Bring back vertical grid lines and graph legend series toggling instructions. #16163 #16164
+* [ENHANCEMENT] Mixin: The `cluster` label can be customized using `clusterLabel`. #15826
+* [PERF] TSDB: Optimize some operations on head chunks by taking shortcuts. #12659
+* [PERF] TSDB & Agent: Reduce memory footprint during WL replay. #15778
+* [PERF] Remote-Write: Reduce memory footprint during WAL replay. #16197
+* [PERF] API: Reduce memory footprint during header parsing. #16001
+* [PERF] Rules: Improve dependency evaluation, enabling better concurrency. #16039
+* [PERF] Scraping: Improve scraping performance for native histograms. #15731
+* [PERF] Scraping: Improve parsing of created timestamps. #16072
+* [BUGFIX] Scraping: Bump cache iteration after error to avoid false duplicate detections. #16174
+* [BUGFIX] Scraping: Skip native histograms series when ingestion is disabled. #16218
+* [BUGFIX] PromQL: Fix counter reset detection for native histograms. #15902 #15987
+* [BUGFIX] PromQL: Fix inconsistent behavior with an empty range. #15970
+* [BUGFIX] PromQL: Fix inconsistent annotation in `quantile_over_time()`. #16018
+* [BUGFIX] PromQL: Prevent `label_join()` from producing duplicates. #15975
+* [BUGFIX] PromQL: Ignore native histograms in `scalar()`, `sort()` and `sort_desc()`. #15964
+* [BUGFIX] PromQL: Fix annotations for binary operations between incompatible native histograms. #15895
+* [BUGFIX] Alerting: Consider alert relabeling when deciding whether alerts are dropped. #15979
+* [BUGFIX] Config: Set `GoGC` to the default value in case of an empty configuration. #16052
+* [BUGFIX] TSDB: Fix unknown series errors and potential data loss during WAL replay when inactive series are removed from the head and reappear before the next WAL checkpoint. #16060
+* [BUGFIX] Scaleway SD: The public IP will no longer be set to `__meta_meta_scaleway_instance_public_ipv4` if it is an IPv6 address. #14228
+* [BUGFIX] UI: Display the correct value of Alerting rules' `keep_firing_for`. #16211
+
+## 3.2.1 / 2025-02-25
+
+* [BUGFIX] Don't send Accept` header `escape=allow-utf-8` when `metric_name_validation_scheme: legacy` is configured. #16061
+
+## 3.2.0 / 2025-02-17
+
+* [CHANGE] relabel: Replace actions can now use UTF-8 characters in `targetLabel` field. Note that `$` or `${}` will be expanded. This also apply to `replacement` field for `LabelMap` action. #15851
+* [CHANGE] rulefmt: Rule names can use UTF-8 characters, except `{` and `}` characters (due to common mistake checks). #15851
+* [FEATURE] remote/otlp: Add feature flag `otlp-deltatocumulative` to support conversion from delta to cumulative. #15165
+* [ENHANCEMENT] openstack SD: Discover Octavia loadbalancers. #15539
+* [ENHANCEMENT] scrape: Add metadata for automatic metrics to WAL for `metadata-wal-records` feature. #15837
+* [ENHANCEMENT] promtool: Support linting of scrape interval, through lint option `too-long-scrape-interval`. #15719
+* [ENHANCEMENT] promtool: Add --ignore-unknown-fields option. #15706
+* [ENHANCEMENT] ui: Make "hide empty rules" and hide empty rules" persistent #15807
+* [ENHANCEMENT] web/api: Add a limit parameter to `/query` and `/query_range`. #15552
+* [ENHANCEMENT] api: Add fields Node and ServerTime to `/status`. #15784
+* [PERF] Scraping: defer computing labels for dropped targets until they are needed by the UI. #15261
+* [BUGFIX] remotewrite2: Fix invalid metadata bug for metrics without metadata. #15829
+* [BUGFIX] remotewrite2: Fix the unit field propagation. #15825
+* [BUGFIX] scrape: Fix WAL metadata for histograms and summaries. #15832
+* [BUGFIX] ui: Merge duplicate "Alerts page settings" sections. #15810
+* [BUGFIX] PromQL: Fix `` functions with histograms. #15711
+
+## 3.1.0 / 2025-01-02
+
+ * [SECURITY] upgrade golang.org/x/crypto to address reported CVE-2024-45337. #15691
+ * [CHANGE] Notifier: Increment prometheus_notifications_errors_total by the number of affected alerts rather than per batch. #15428
+ * [CHANGE] API: list rules field "groupNextToken:omitempty" renamed to "groupNextToken". #15400
+ * [ENHANCEMENT] OTLP translate: keep identifying attributes in target_info. #15448
+ * [ENHANCEMENT] Paginate rule groups, add infinite scroll to rules within groups. #15677
+ * [ENHANCEMENT] TSDB: Improve calculation of space used by labels. #13880
+ * [ENHANCEMENT] Rules: new metric rule_group_last_rule_duration_sum_seconds. #15672
+ * [ENHANCEMENT] Observability: Export 'go_sync_mutex_wait_total_seconds_total' metric. #15339
+ * [ENHANCEMEN] Remote-Write: optionally use a DNS resolver that picks a random IP. #15329
+ * [PERF] Optimize `l=~".+"` matcher. #15474, #15684
+ * [PERF] TSDB: Cache all symbols for compaction . #15455
+ * [PERF] TSDB: MemPostings: keep a map of label values slices. #15426
+ * [PERF] Remote-Write: Remove interning hook. #15456
+ * [PERF] Scrape: optimize string manipulation for experimental native histograms with custom buckets. #15453
+ * [PERF] TSDB: reduce memory allocations. #15465, #15427
+ * [PERF] Storage: Implement limit in mergeGenericQuerier. #14489
+ * [PERF] TSDB: Optimize inverse matching. #14144
+ * [PERF] Regex: use stack memory for lowercase copy of string. #15210
+ * [PERF] TSDB: When deleting from postings index, pause to unlock and let readers read. #15242
+ * [BUGFIX] Main: Avoid possible segfault at exit. (#15724)
+ * [BUGFIX] Rules: Do not run rules concurrently if uncertain about dependencies. #15560
+ * [BUGFIX] PromQL: Adds test for `absent`, `absent_over_time` and `deriv` func with histograms. #15667
+ * [BUGFIX] PromQL: Fix various bugs related to quoting UTF-8 characters. #15531
+ * [BUGFIX] Scrape: fix nil panic after scrape loop reload. #15563
+ * [BUGFIX] Remote-write: fix panic on repeated log message. #15562
+ * [BUGFIX] Scrape: reload would ignore always_scrape_classic_histograms and convert_classic_histograms_to_nhcb configs. #15489
+ * [BUGFIX] TSDB: fix data corruption in experimental native histograms. #15482
+ * [BUGFIX] PromQL: Ignore histograms in all time related functions. #15479
+ * [BUGFIX] OTLP receiver: Convert metric metadata. #15416
+ * [BUGFIX] PromQL: Fix `resets` function for histograms. #15527
+ * [BUGFIX] PromQL: Fix behaviour of `changes()` for mix of histograms and floats. #15469
+ * [BUGFIX] PromQL: Fix behaviour of some aggregations with histograms. #15432
+ * [BUGFIX] allow quoted exemplar keys in openmetrics text format. #15260
+ * [BUGFIX] TSDB: fixes for rare conditions when loading write-behind-log (WBL). #15380
+ * [BUGFIX] `round()` function did not remove `__name__` label. #15250
+ * [BUGFIX] Promtool: analyze block shows metric name with 0 cardinality. #15438
+ * [BUGFIX] PromQL: Fix `count_values` for histograms. #15422
+ * [BUGFIX] PromQL: fix issues with comparison binary operations with `bool` modifier and native histograms. #15413
+ * [BUGFIX] PromQL: fix incorrect "native histogram ignored in aggregation" annotations. #15414
+ * [BUGFIX] PromQL: Corrects the behaviour of some operator and aggregators with Native Histograms. #15245
+ * [BUGFIX] TSDB: Always return unknown hint for first sample in non-gauge histogram chunk. #15343
+ * [BUGFIX] PromQL: Clamp functions: Ignore any points with native histograms. #15169
+ * [BUGFIX] TSDB: Fix race on stale values in headAppender. #15322
+ * [BUGFIX] UI: Fix selector / series formatting for empty metric names. #15340
+ * [BUGFIX] OTLP receiver: Allow colons in non-standard units. #15710
+
+## 3.0.1 / 2024-11-28
+
+The first bug fix release for Prometheus 3.
+
+* [BUGFIX] Promql: Make subqueries left open. #15431
+* [BUGFIX] Fix memory leak when query log is enabled. #15434
+* [BUGFIX] Support utf8 names on /v1/label/:name/values endpoint. #15399
+
+## 3.0.0 / 2024-11-14
+
+This release includes new features such as a brand new UI and UTF-8 support enabled by default. As this marks the first new major version in seven years, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. For users that want to upgrade we recommend to read through our [migration guide](https://prometheus.io/docs/prometheus/3.0/migration/).
+
+* [CHANGE] Set the `GOMAXPROCS` variable automatically to match the Linux CPU quota. Use `--no-auto-gomaxprocs` to disable it. The `auto-gomaxprocs` feature flag was removed. #15376
+* [CHANGE] Set the `GOMEMLIMIT` variable automatically to match the Linux container memory limit. Use `--no-auto-gomemlimit` to disable it. The `auto-gomemlimit` feature flag was removed. #15373
+* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136
+* [CHANGE] Remote-write: default enable_http2 to false. #15219
+* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164
+* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178
+* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657
+* [CHANGE] Disallow configuring AM with the v1 api. #13883
+* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505
+* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930
+* [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894
+* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160
+* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906
+* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion` is enabled. #14738
+* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111
+* [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872
+* [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904
+* [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365
+* [CHANGE] Kubernetes SD: Remove support for `networking.k8s.io/v1beta1` API version of Ingress. This version is no longer served as of Kubernetes v1.22. #14365
+* [CHANGE] UTF-8: Enable UTF-8 support by default. Prometheus now allows all UTF-8 characters in metric and label names. The corresponding `utf8-name` feature flag has been removed. #14705, #15258
+* [CHANGE] Console: Remove example files for the console feature. Users can continue using the console feature by supplying their own JavaScript and templates. #14807
+* [CHANGE] SD: Enable the new service discovery manager by default. This SD manager does not restart unchanged discoveries upon reloading. This makes reloads faster and reduces pressure on service discoveries' sources. The corresponding `new-service-discovery-manager` feature flag has been removed. #14770
+* [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747
+* [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526
+* [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643
+* [FEATURE] OTLP receiver: Ability to skip UTF-8 normalization using `otlp.translation_strategy = NoUTF8EscapingWithSuffixes` configuration option. #15384
+* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769, #15011
+* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710
+* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196
+* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694
+* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096
+* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082
+* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677
+* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546
+* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909
+* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929
+* [ENHANCEMENT] Consul SD: Support catalog filters. #11224
+* [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875
+* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975
+* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932
+* [PERF] TSDB: Grow postings by doubling. #14721
+* [PERF] Relabeling: Optimize adding a constant label pair. #12180
+* [BUGFIX] Scraping: Don't log errors on empty scrapes. #15357
+* [BUGFIX] UI: fix selector / series formatting for empty metric names. #15341
+* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941
+* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941
+* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251
+* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095
+* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910
+* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854
+* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884
+* [BUGFIX] PromQL: Unary negation of native histograms. #14821
+* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025
+* [BUGFIX] Autoreload: Reload invalid yaml files. #14947
+* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029
+
+## 2.53.5 / 2025-06-30
+
+* [ENHANCEMENT] TSDB: Add backward compatibility with the upcoming TSDB block index v3 #16762
+* [BUGFIX] Top-level: Update GOGC before loading TSDB #16521
+
+## 2.53.4 / 2025-03-18
+
+* [BUGFIX] Runtime: fix GOGC is being set to 0 when installed with empty prometheus.yml file resulting high cpu usage. #16090
+* [BUGFIX] Scrape: fix dropping valid metrics after previous scrape failed. #16220
+
+## 2.53.3 / 2024-11-04
+
+* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685, #14740
+
+## 2.53.2 / 2024-08-09
+
+Fix a bug where Prometheus would crash with a segmentation fault if a remote-read
+request accessed a block on disk at about the same time as TSDB created a new block.
+
+* [BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515,#14523
+
+## 2.55.1 / 2024-11-04
+
+* [BUGFIX] `round()` function did not remove `__name__` label. #15250
+
+## 2.55.0 / 2024-10-22
+
+* [FEATURE] PromQL: Add experimental `info` function. #14495
+* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727
+* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817
+* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815
+* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734
+* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200
+* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346
+* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403
+* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506
+* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532
+* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706
+* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612
+* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379
+* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450
+* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477
+* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985
+* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621
+* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413
+* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816
+* [ENHANCEMENT] API: Support multiple listening addresses. #14665
+* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934
+* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120
+* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729
+* [BUGFIX] PromQL: make sort_by_label stable. #14985
+* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147
+* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622
+* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810
+* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766
+* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716
+* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821
+* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042
+
+## 2.54.1 / 2024-08-27
+
+* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps (mixing samples of the same series with and without timestamps is still rejected). #14685
+* [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654
+* [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538
+* [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514
+* [BUGFIX] PromQL: fix native histograms getting corrupted due to vector selector bug in range queries. #14605
+
+## 2.54.0 / 2024-08-09
+
+Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/).
+This is experimental at this time and may still change.
+Remote-write v2 is enabled by default, but can be disabled via feature-flag `web.remote-write-receiver.accepted-protobuf-messages`.
+
+* [CHANGE] Remote-Write: `highest_timestamp_in_seconds` and `queue_highest_sent_timestamp_seconds` metrics now initialized to 0. #14437
+* [CHANGE] API: Split warnings from info annotations in API response. #14327
+* [FEATURE] Remote-Write: Version 2.0 experimental, plus metadata in WAL via feature flag `metadata-wal-records` (defaults on). #14395,#14427,#14444
+* [FEATURE] PromQL: add limitk() and limit_ratio() aggregation operators. #12503
+* [ENHANCEMENT] PromQL: Accept underscores in literal numbers, e.g. 1_000_000 for 1 million. #12821
+* [ENHANCEMENT] PromQL: float literal numbers and durations are now interchangeable (experimental). Example: `time() - my_timestamp > 10m`. #9138
+* [ENHANCEMENT] PromQL: use Kahan summation for sum(). #14074,#14362
+* [ENHANCEMENT] PromQL (experimental native histograms): Optimize `histogram_count` and `histogram_sum` functions. #14097
+* [ENHANCEMENT] TSDB: Better support for out-of-order experimental native histogram samples. #14438
+* [ENHANCEMENT] TSDB: Optimise seek within index. #14393
+* [ENHANCEMENT] TSDB: Optimise deletion of stale series. #14307
+* [ENHANCEMENT] TSDB: Reduce locking to optimise adding and removing series. #13286,#14286
+* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396,#14584
+* [ENHANCEMENT] Regexps: Optimize patterns with multiple prefixes. #13843,#14368
+* [ENHANCEMENT] Regexps: Optimize patterns containing multiple literal strings. #14173
+* [ENHANCEMENT] AWS SD: expose Primary IPv6 addresses as __meta_ec2_primary_ipv6_addresses. #14156
+* [ENHANCEMENT] Docker SD: add MatchFirstNetwork for containers with multiple networks. #10490
+* [ENHANCEMENT] OpenStack SD: Use `flavor.original_name` if available. #14312
+* [ENHANCEMENT] UI (experimental native histograms): more accurate representation. #13680,#14430
+* [ENHANCEMENT] Agent: `out_of_order_time_window` config option now applies to agent. #14094
+* [ENHANCEMENT] Notifier: Send any outstanding Alertmanager notifications when shutting down. #14290
+* [ENHANCEMENT] Rules: Add label-matcher support to Rules API. #10194
+* [ENHANCEMENT] HTTP API: Add url to message logged on error while sending response. #14209
+* [BUGFIX] TSDB: Exclude OOO chunks mapped after compaction starts (introduced by #14396). #14584
+* [BUGFIX] CLI: escape `|` characters when generating docs. #14420
+* [BUGFIX] PromQL (experimental native histograms): Fix some binary operators between native histogram values. #14454
+* [BUGFIX] TSDB: LabelNames API could fail during compaction. #14279
+* [BUGFIX] TSDB: Fix rare issue where pending OOO read can be left dangling if creating querier fails. #14341
+* [BUGFIX] TSDB: fix check for context cancellation in LabelNamesFor. #14302
+* [BUGFIX] Rules: Fix rare panic on reload. #14366
+* [BUGFIX] Config: In YAML marshalling, do not output a regexp field if it was never set. #14004
+* [BUGFIX] Remote-Write: reject samples with future timestamps. #14304
+* [BUGFIX] Remote-Write: Fix data corruption in remote write if max_sample_age is applied. #14078
+* [BUGFIX] Notifier: Fix Alertmanager discovery not updating under heavy load. #14174
+* [BUGFIX] Regexes: some Unicode characters were not matched by case-insensitive comparison. #14170,#14299
+* [BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515
## 2.53.1 / 2024-07-10
@@ -35,6 +349,7 @@ This release changes the default for GOGC, the Go runtime control for the trade-
## 2.52.0 / 2024-05-07
* [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633
+* [CHANGE] Scrape: Multiple samples (even with different timestamps) are treated as duplicates during one scrape.
* [FEATURE] Kubernetes SD: Add a new metric `prometheus_sd_kubernetes_failures_total` to track failed requests to Kubernetes API. #13554
* [FEATURE] Kubernetes SD: Add node and zone metadata labels when using the endpointslice role. #13935
* [FEATURE] Azure SD/Remote Write: Allow usage of Azure authorization SDK. #13099
@@ -48,7 +363,7 @@ This release changes the default for GOGC, the Go runtime control for the trade-
* [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754
* [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772
* [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823
-* [ENHANCEMENT] Observability: Log chunk snapshot and mmaped chunk replay duration during WAL replay. #13838
+* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838
* [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846
* [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667
* [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852
@@ -585,7 +900,7 @@ The binaries published with this release are built with Go1.17.8 to avoid [CVE-2
## 2.33.0 / 2022-01-29
-* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121
+* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121
* [CHANGE] Web: Promote remote-write-receiver to stable. #10119
* [FEATURE] Config: Add `stripPort` template function. #10002
* [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045
@@ -822,7 +1137,7 @@ This vulnerability has been reported by Aaron Devaney from MDSec.
* [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682
* [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659
* [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790
-* [BUGFIX] TSDB: Avoid panic when mmaped memory is referenced after the file is closed. #8723
+* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723
* [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737
* [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766
@@ -1748,7 +2063,7 @@ information, read the announcement blog post and migration guide.
## 1.7.0 / 2017-06-06
* [CHANGE] Compress remote storage requests and responses with unframed/raw snappy.
-* [CHANGE] Properly ellide secrets in config.
+* [CHANGE] Properly elide secrets in config.
* [FEATURE] Add OpenStack service discovery.
* [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces.
* [FEATURE] Add metric for discovered number of Alertmanagers.
diff --git a/Dockerfile b/Dockerfile
index b47f77dcd6..31e863d8a0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,27 +2,23 @@ ARG ARCH="amd64"
ARG OS="linux"
FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest
LABEL maintainer="The Prometheus Authors "
+LABEL org.opencontainers.image.source="https://github.com/prometheus/prometheus"
ARG ARCH="amd64"
ARG OS="linux"
COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus
COPY .build/${OS}-${ARCH}/promtool /bin/promtool
COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml
-COPY console_libraries/ /usr/share/prometheus/console_libraries/
-COPY consoles/ /usr/share/prometheus/consoles/
COPY LICENSE /LICENSE
COPY NOTICE /NOTICE
COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2
WORKDIR /prometheus
-RUN ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/ && \
- chown -R nobody:nobody /etc/prometheus /prometheus
+RUN chown -R nobody:nobody /etc/prometheus /prometheus && chmod g+w /prometheus
USER nobody
EXPOSE 9090
VOLUME [ "/prometheus" ]
ENTRYPOINT [ "/bin/prometheus" ]
CMD [ "--config.file=/etc/prometheus/prometheus.yml", \
- "--storage.tsdb.path=/prometheus", \
- "--web.console.libraries=/usr/share/prometheus/console_libraries", \
- "--web.console.templates=/usr/share/prometheus/consoles" ]
+ "--storage.tsdb.path=/prometheus" ]
diff --git a/MAINTAINERS.md b/MAINTAINERS.md
index 3661ddaa0a..8d10a8fbca 100644
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -2,7 +2,6 @@
General maintainers:
* Bryan Boreham (bjboreham@gmail.com / @bboreham)
-* Levi Harrison (levi@leviharrison.dev / @LeviHarrison)
* Ayoub Mrini (ayoubmrini424@gmail.com / @machine424)
* Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie)
@@ -10,16 +9,17 @@ Maintainers for specific parts of the codebase:
* `cmd`
* `promtool`: David Leadbeater ( / @dgl)
* `discovery`
+ * `azure`: Jan-Otto Kröpke ( / @jkroepke)
* `k8s`: Frederic Branczyk ( / @brancz)
+ * `stackit`: Jan-Otto Kröpke ( / @jkroepke)
* `documentation`
* `prometheus-mixin`: Matthias Loibl ( / @metalmatze)
-* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7),
+* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7),
George Krajcsovits ( / @krajorama)
* `storage`
* `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( / @npazosmendez), Alex Greenbank ( / @alexgreenbank)
- * `otlptranslator`: Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez)
+ * `otlptranslator`: Arthur Silva Sens ( / @ArthurSens), Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez)
* `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez)
- * `agent`: Robert Fratto ( / @rfratto)
* `web`
* `ui`: Julius Volz ( / @juliusv)
* `module`: Augustin Husson ( @nexucis)
diff --git a/Makefile b/Makefile
index f2bb3fcb7a..0b5935de00 100644
--- a/Makefile
+++ b/Makefile
@@ -30,6 +30,11 @@ include Makefile.common
DOCKER_IMAGE_NAME ?= prometheus
+# Only build UI if PREBUILT_ASSETS_STATIC_DIR is not set
+ifdef PREBUILT_ASSETS_STATIC_DIR
+ SKIP_UI_BUILD = true
+endif
+
.PHONY: update-npm-deps
update-npm-deps:
@echo ">> updating npm dependencies"
@@ -42,13 +47,17 @@ upgrade-npm-deps:
.PHONY: ui-bump-version
ui-bump-version:
- version=$$(sed s/2/0/ < VERSION) && ./scripts/ui_release.sh --bump-version "$${version}"
+ version=$$(./scripts/get_module_version.sh) && ./scripts/ui_release.sh --bump-version "$${version}"
cd web/ui && npm install
git add "./web/ui/package-lock.json" "./**/package.json"
.PHONY: ui-install
ui-install:
cd $(UI_PATH) && npm install
+ # The old React app has been separated from the npm workspaces setup to avoid
+ # issues with conflicting dependencies. This is a temporary solution until the
+ # new Mantine-based UI is fully integrated and the old app can be removed.
+ cd $(UI_PATH)/react-app && npm install
.PHONY: ui-build
ui-build:
@@ -65,10 +74,30 @@ ui-test:
.PHONY: ui-lint
ui-lint:
cd $(UI_PATH) && npm run lint
+ # The old React app has been separated from the npm workspaces setup to avoid
+ # issues with conflicting dependencies. This is a temporary solution until the
+ # new Mantine-based UI is fully integrated and the old app can be removed.
+ cd $(UI_PATH)/react-app && npm run lint
.PHONY: assets
+ifndef SKIP_UI_BUILD
assets: ui-install ui-build
+.PHONY: npm_licenses
+npm_licenses: ui-install
+ @echo ">> bundling npm licenses"
+ rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
+ ln -s . npm_licenses
+ find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
+ rm -f npm_licenses
+else
+assets:
+ @echo '>> skipping assets build, pre-built assets provided'
+
+npm_licenses:
+ @echo '>> skipping assets npm licenses, pre-built assets provided'
+endif
+
.PHONY: assets-compress
assets-compress: assets
@echo '>> compressing assets'
@@ -117,14 +146,6 @@ else
test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version
endif
-.PHONY: npm_licenses
-npm_licenses: ui-install
- @echo ">> bundling npm licenses"
- rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
- ln -s . npm_licenses
- find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
- rm -f npm_licenses
-
.PHONY: tarball
tarball: npm_licenses common-tarball
diff --git a/Makefile.common b/Makefile.common
index e3da72ab47..4de21512ff 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -61,7 +61,8 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_
SKIP_GOLANGCI_LINT :=
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
-GOLANGCI_LINT_VERSION ?= v1.59.1
+GOLANGCI_LINT_VERSION ?= v2.1.5
+GOLANGCI_FMT_OPTS ?=
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
@@ -156,9 +157,13 @@ $(GOTEST_DIR):
@mkdir -p $@
.PHONY: common-format
-common-format:
+common-format: $(GOLANGCI_LINT)
@echo ">> formatting code"
$(GO) fmt $(pkgs)
+ifdef GOLANGCI_LINT
+ @echo ">> formatting code with golangci-lint"
+ $(GOLANGCI_LINT) fmt $(GOLANGCI_FMT_OPTS)
+endif
.PHONY: common-vet
common-vet:
@@ -248,8 +253,8 @@ $(PROMU):
cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu
rm -r $(PROMU_TMP)
-.PHONY: proto
-proto:
+.PHONY: common-proto
+common-proto:
@echo ">> generating code from proto files"
@./scripts/genproto.sh
@@ -275,3 +280,9 @@ $(1)_precheck:
exit 1; \
fi
endef
+
+govulncheck: install-govulncheck
+ govulncheck ./...
+
+install-govulncheck:
+ command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest
diff --git a/README.md b/README.md
index cd14ed2ecb..26262734c0 100644
--- a/README.md
+++ b/README.md
@@ -12,9 +12,10 @@ examples and guides.
[][hub]
[](https://goreportcard.com/report/github.com/prometheus/prometheus)
[](https://bestpractices.coreinfrastructure.org/projects/486)
+[](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus)
+[](https://clomonitor.io/projects/cncf/prometheus)
[](https://gitpod.io/#https://github.com/prometheus/prometheus)
[](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:prometheus)
-[](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus)
@@ -66,9 +67,9 @@ Prometheus will now be reachable at .
To build Prometheus from source code, You need:
-* Go [version 1.17 or greater](https://golang.org/doc/install).
-* NodeJS [version 16 or greater](https://nodejs.org/).
-* npm [version 7 or greater](https://www.npmjs.com/).
+* Go [version 1.22 or greater](https://golang.org/doc/install).
+* NodeJS [version 22 or greater](https://nodejs.org/).
+* npm [version 8 or greater](https://www.npmjs.com/).
Start by cloning the repository:
@@ -114,7 +115,7 @@ The Makefile provides several targets:
Prometheus is bundled with many service discovery plugins.
When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml)
-file to disable some service discoveries. The file is a yaml-formated list of go
+file to disable some service discoveries. The file is a yaml-formatted list of go
import path that will be built into the Prometheus binary.
After you have changed the file, you
@@ -129,7 +130,6 @@ always, be extra careful when loading third party code.
### Building the Docker image
-The `make docker` target is designed for use in our CI system.
You can build a docker image locally with the following commands:
```bash
@@ -139,6 +139,9 @@ make npm_licenses
make common-docker-amd64
```
+The `make docker` target is intended only for use in our CI system and will not
+produce a fully working image when run locally.
+
## Using Prometheus as a Go Library
### Remote Write
@@ -157,8 +160,19 @@ This is experimental.
### Prometheus code base
In order to comply with [go mod](https://go.dev/ref/mod#versions) rules,
-Prometheus release number do not exactly match Go module releases. For the
-Prometheus v2.y.z releases, we are publishing equivalent v0.y.z tags.
+Prometheus release number do not exactly match Go module releases.
+
+For the
+Prometheus v3.y.z releases, we are publishing equivalent v0.3y.z tags. The y in v0.3y.z is always padded to two digits, with a leading zero if needed.
+
+Therefore, a user that would want to use Prometheus v3.0.0 as a library could do:
+
+```shell
+go get github.com/prometheus/prometheus@v0.300.0
+```
+
+For the
+Prometheus v2.y.z releases, we published the equivalent v0.y.z tags.
Therefore, a user that would want to use Prometheus v2.35.0 as a library could do:
@@ -176,7 +190,7 @@ For more information on building, running, and developing on the React-based UI,
## More information
-* Godoc documentation is available via [pkg.go.dev](https://pkg.go.dev/github.com/prometheus/prometheus). Due to peculiarities of Go Modules, v2.x.y will be displayed as v0.x.y.
+* Godoc documentation is available via [pkg.go.dev](https://pkg.go.dev/github.com/prometheus/prometheus). Due to peculiarities of Go Modules, v3.y.z will be displayed as v0.3y.z (the y in v0.3y.z is always padded to two digits, with a leading zero if needed), while v2.y.z will be displayed as v0.y.z.
* See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels.
## Contributing
diff --git a/RELEASE.md b/RELEASE.md
index 0d3f7456cd..a7032bd95e 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -5,60 +5,17 @@ This page describes the release process and the currently planned schedule for u
## Release schedule
Release cadence of first pre-releases being cut is 6 weeks.
+Please see [the v2.55 RELEASE.md](https://github.com/prometheus/prometheus/blob/release-2.55/RELEASE.md) for the v2 release series schedule.
-| release series | date of first pre-release (year-month-day) | release shepherd |
-|----------------|--------------------------------------------|---------------------------------------------|
-| v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) |
-| v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) |
-| v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) |
-| v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) |
-| v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) |
-| v2.9 | 2019-04-10 | Brian Brazil (GitHub: @brian-brazil) |
-| v2.10 | 2019-05-22 | Björn Rabenstein (GitHub: @beorn7) |
-| v2.11 | 2019-07-03 | Frederic Branczyk (GitHub: @brancz) |
-| v2.12 | 2019-08-14 | Julius Volz (GitHub: @juliusv) |
-| v2.13 | 2019-09-25 | Krasi Georgiev (GitHub: @krasi-georgiev) |
-| v2.14 | 2019-11-06 | Chris Marchbanks (GitHub: @csmarchbanks) |
-| v2.15 | 2019-12-18 | Bartek Plotka (GitHub: @bwplotka) |
-| v2.16 | 2020-01-29 | Callum Styan (GitHub: @cstyan) |
-| v2.17 | 2020-03-11 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.18 | 2020-04-22 | Bartek Plotka (GitHub: @bwplotka) |
-| v2.19 | 2020-06-03 | Ganesh Vernekar (GitHub: @codesome) |
-| v2.20 | 2020-07-15 | Björn Rabenstein (GitHub: @beorn7) |
-| v2.21 | 2020-08-26 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.22 | 2020-10-07 | Frederic Branczyk (GitHub: @brancz) |
-| v2.23 | 2020-11-18 | Ganesh Vernekar (GitHub: @codesome) |
-| v2.24 | 2020-12-30 | Björn Rabenstein (GitHub: @beorn7) |
-| v2.25 | 2021-02-10 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.26 | 2021-03-24 | Bartek Plotka (GitHub: @bwplotka) |
-| v2.27 | 2021-05-05 | Chris Marchbanks (GitHub: @csmarchbanks) |
-| v2.28 | 2021-06-16 | Julius Volz (GitHub: @juliusv) |
-| v2.29 | 2021-07-28 | Frederic Branczyk (GitHub: @brancz) |
-| v2.30 | 2021-09-08 | Ganesh Vernekar (GitHub: @codesome) |
-| v2.31 | 2021-10-20 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.32 | 2021-12-01 | Julius Volz (GitHub: @juliusv) |
-| v2.33 | 2022-01-12 | Björn Rabenstein (GitHub: @beorn7) |
-| v2.34 | 2022-02-23 | Chris Marchbanks (GitHub: @csmarchbanks) |
-| v2.35 | 2022-04-06 | Augustin Husson (GitHub: @nexucis) |
-| v2.36 | 2022-05-18 | Matthias Loibl (GitHub: @metalmatze) |
-| v2.37 LTS | 2022-06-29 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.38 | 2022-08-10 | Julius Volz (GitHub: @juliusv) |
-| v2.39 | 2022-09-21 | Ganesh Vernekar (GitHub: @codesome) |
-| v2.40 | 2022-11-02 | Ganesh Vernekar (GitHub: @codesome) |
-| v2.41 | 2022-12-14 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.42 | 2023-01-25 | Kemal Akkoyun (GitHub: @kakkoyun) |
-| v2.43 | 2023-03-08 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.44 | 2023-04-19 | Bryan Boreham (GitHub: @bboreham) |
-| v2.45 LTS | 2023-05-31 | Jesus Vazquez (Github: @jesusvazquez) |
-| v2.46 | 2023-07-12 | Julien Pivotto (GitHub: @roidelapluie) |
-| v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) |
-| v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) |
-| v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) |
-| v2.50 | 2024-01-16 | Augustin Husson (GitHub: @nexucis) |
-| v2.51 | 2024-03-07 | Bryan Boreham (GitHub: @bboreham) |
-| v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) |
-| v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) |
-| v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) |
+| release series | date of first pre-release (year-month-day) | release shepherd |
+|----------------|--------------------------------------------|------------------------------------|
+| v3.0 | 2024-11-14 | Jan Fajerski (GitHub: @jan--f) |
+| v3.1 | 2024-12-17 | Bryan Boreham (GitHub: @bboreham) |
+| v3.2 | 2025-01-28 | Jan Fajerski (GitHub: @jan--f) |
+| v3.3 | 2025-03-11 | Ayoub Mrini (Github: @machine424) |
+| v3.4 | 2025-04-29 | Jan-Otto Kröpke (Github: @jkroepke)|
+| v3.5 LTS | 2025-06-03 | Bryan Boreham (GitHub: @bboreham) |
+| v3.6 | 2025-07-15 | **volunteer welcome** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
@@ -180,19 +137,7 @@ git tag -s "${tag}" -m "${tag}"
git push origin "${tag}"
```
-Go modules versioning requires strict use of semver. Because we do not commit to
-avoid code-level breaking changes for the libraries between minor releases of
-the Prometheus server, we use major version zero releases for the libraries.
-
-Tag the new library release via the following commands:
-
-```bash
-tag="v$(sed s/2/0/ < VERSION)"
-git tag -s "${tag}" -m "${tag}"
-git push origin "${tag}"
-```
-
-Optionally, you can use this handy `.gitconfig` alias.
+Alternatively, you can use this handy `.gitconfig` alias.
```ini
[alias]
@@ -203,12 +148,27 @@ Then release with `git tag-release`.
Signing a tag with a GPG key is appreciated, but in case you can't add a GPG key to your Github account using the following [procedure](https://help.github.com/articles/generating-a-gpg-key/), you can replace the `-s` flag by `-a` flag of the `git tag` command to only annotate the tag without signing.
-Once a tag is created, the release process through CircleCI will be triggered for this tag and Circle CI will draft the GitHub release using the `prombot` account.
+Once a tag is created, the release process through Github Actions will be triggered for this tag and Github Actions will draft the GitHub release using the `prombot` account.
Finally, wait for the build step for the tag to finish. The point here is to wait for tarballs to be uploaded to the Github release and the container images to be pushed to the Docker Hub and Quay.io. Once that has happened, click _Publish release_, which will make the release publicly visible and create a GitHub notification.
**Note:** for a release candidate version ensure the _This is a pre-release_ box is checked when drafting the release in the Github UI. The CI job should take care of this but it's a good idea to double check before clicking _Publish release_.`
-### 3. Wrapping up
+### 3. Tag the library release
+
+Go modules versioning requires strict use of semver. Because we do not commit to
+avoid code-level breaking changes for the libraries between minor releases of
+the Prometheus server, we use major version zero releases for the libraries.
+
+Tagging the new library release works similar to the normal release tagging,
+but without the subsequent build and publish steps. Use the following commands:
+
+```bash
+tag="v$(./scripts/get_module_version.sh)"
+git tag -s "${tag}" -m "${tag}"
+git push origin "${tag}"
+```
+
+### 4. Wrapping up
For release candidate versions (`v2.16.0-rc.0`), run the benchmark for 3 days using the `/prombench vX.Y.Z` command, `vX.Y.Z` being the latest stable patch release's tag of the previous minor release series, such as `v2.15.2`.
diff --git a/SECURITY-INSIGHTS.yml b/SECURITY-INSIGHTS.yml
new file mode 100644
index 0000000000..009b356214
--- /dev/null
+++ b/SECURITY-INSIGHTS.yml
@@ -0,0 +1,48 @@
+header:
+ schema-version: '1.0.0'
+ expiration-date: '2025-07-30T01:00:00.000Z'
+ last-updated: '2024-07-30'
+ last-reviewed: '2024-07-30'
+ project-url: https://github.com/prometheus/prometheus
+ changelog: https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md
+ license: https://github.com/prometheus/prometheus/blob/main/LICENSE
+project-lifecycle:
+ status: active
+ bug-fixes-only: false
+ core-maintainers:
+ - https://github.com/prometheus/prometheus/blob/main/MAINTAINERS.md
+contribution-policy:
+ accepts-pull-requests: true
+ accepts-automated-pull-requests: true
+dependencies:
+ third-party-packages: true
+ dependencies-lists:
+ - https://github.com/prometheus/prometheus/blob/main/go.mod
+ - https://github.com/prometheus/prometheus/blob/main/web/ui/package.json
+ env-dependencies-policy:
+ policy-url: https://github.com/prometheus/prometheus/blob/main/CONTRIBUTING.md#dependency-management
+distribution-points:
+ - https://github.com/prometheus/prometheus/releases
+documentation:
+ - https://prometheus.io/docs/introduction/overview/
+security-contacts:
+ - type: email
+ value: prometheus-team@googlegroups.com
+security-testing:
+ - tool-type: sca
+ tool-name: Dependabot
+ tool-version: latest
+ integration:
+ ad-hoc: false
+ ci: true
+ before-release: true
+ - tool-type: sast
+ tool-name: CodeQL
+ tool-version: latest
+ integration:
+ ad-hoc: false
+ ci: true
+ before-release: true
+vulnerability-reporting:
+ accepts-vulnerability-reports: true
+ security-policy: https://github.com/prometheus/prometheus/security/policy
diff --git a/VERSION b/VERSION
index f419e2c6f1..4d9d11cf50 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.53.1
+3.4.2
diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index 1d844ddba6..ed7aa52c8a 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -18,17 +18,19 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"math"
"math/bits"
"net"
"net/http"
- _ "net/http/pprof" // Comment this line to disable pprof endpoint.
"net/url"
"os"
"os/signal"
"path/filepath"
+ goregexp "regexp" //nolint:depguard // The Prometheus client library requires us to pass a regexp from this package.
"runtime"
"runtime/debug"
+ "slices"
"strconv"
"strings"
"sync"
@@ -38,8 +40,6 @@ import (
"github.com/KimMachineGun/automemlimit/memlimit"
"github.com/alecthomas/kingpin/v2"
"github.com/alecthomas/units"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/mwitkow/go-conntrack"
"github.com/oklog/run"
@@ -47,8 +47,8 @@ import (
"github.com/prometheus/client_golang/prometheus/collectors"
versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version"
"github.com/prometheus/common/model"
- "github.com/prometheus/common/promlog"
- promlogflag "github.com/prometheus/common/promlog/flag"
+ "github.com/prometheus/common/promslog"
+ promslogflag "github.com/prometheus/common/promslog/flag"
"github.com/prometheus/common/version"
toolkit_web "github.com/prometheus/exporter-toolkit/web"
"go.uber.org/atomic"
@@ -58,8 +58,6 @@ import (
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
- "github.com/prometheus/prometheus/discovery/legacymanager"
- "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
@@ -76,13 +74,53 @@ import (
"github.com/prometheus/prometheus/tracing"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/agent"
- "github.com/prometheus/prometheus/tsdb/wlog"
+ "github.com/prometheus/prometheus/util/compression"
"github.com/prometheus/prometheus/util/documentcli"
"github.com/prometheus/prometheus/util/logging"
+ "github.com/prometheus/prometheus/util/notifications"
prom_runtime "github.com/prometheus/prometheus/util/runtime"
"github.com/prometheus/prometheus/web"
)
+// klogv1OutputCallDepth is the stack depth where we can find the origin of this call.
+const klogv1OutputCallDepth = 6
+
+// klogv1DefaultPrefixLength is the length of the log prefix that we have to strip out.
+const klogv1DefaultPrefixLength = 53
+
+// klogv1Writer is used in SetOutputBySeverity call below to redirect any calls
+// to klogv1 to end up in klogv2.
+// This is a hack to support klogv1 without use of go-kit/log. It is inspired
+// by klog's upstream klogv1/v2 coexistence example:
+// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go
+type klogv1Writer struct{}
+
+// Write redirects klogv1 calls to klogv2.
+// This is a hack to support klogv1 without use of go-kit/log. It is inspired
+// by klog's upstream klogv1/v2 coexistence example:
+// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go
+func (kw klogv1Writer) Write(p []byte) (n int, err error) {
+ if len(p) < klogv1DefaultPrefixLength {
+ klogv2.InfoDepth(klogv1OutputCallDepth, string(p))
+ return len(p), nil
+ }
+
+ switch p[0] {
+ case 'I':
+ klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
+ case 'W':
+ klogv2.WarningDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
+ case 'E':
+ klogv2.ErrorDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
+ case 'F':
+ klogv2.FatalDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
+ default:
+ klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
+ }
+
+ return len(p), nil
+}
+
var (
appName = "prometheus"
@@ -103,6 +141,10 @@ var (
)
func init() {
+ // This can be removed when the legacy global mode is fully deprecated.
+ //nolint:staticcheck
+ model.NameValidationScheme = model.UTF8Validation
+
prometheus.MustRegister(versioncollector.NewCollector(strings.ReplaceAll(appName, "-", "_")))
var err error
@@ -115,7 +157,7 @@ func init() {
// serverOnlyFlag creates server-only kingpin flag.
func serverOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagClause {
return app.Flag(name, fmt.Sprintf("%s Use with server mode only.", help)).
- PreAction(func(parseContext *kingpin.ParseContext) error {
+ PreAction(func(_ *kingpin.ParseContext) error {
// This will be invoked only if flag is actually provided by user.
serverOnlyFlags = append(serverOnlyFlags, "--"+name)
return nil
@@ -125,7 +167,7 @@ func serverOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCl
// agentOnlyFlag creates agent-only kingpin flag.
func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagClause {
return app.Flag(name, fmt.Sprintf("%s Use with agent mode only.", help)).
- PreAction(func(parseContext *kingpin.ParseContext) error {
+ PreAction(func(_ *kingpin.ParseContext) error {
// This will be invoked only if flag is actually provided by user.
agentOnlyFlags = append(agentOnlyFlags, "--"+name)
return nil
@@ -135,129 +177,152 @@ func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCla
type flagConfig struct {
configFile string
- agentStoragePath string
- serverStoragePath string
- notifier notifier.Options
- forGracePeriod model.Duration
- outageTolerance model.Duration
- resendDelay model.Duration
- maxConcurrentEvals int64
- web web.Options
- scrape scrape.Options
- tsdb tsdbOptions
- agent agentOptions
- lookbackDelta model.Duration
- webTimeout model.Duration
- queryTimeout model.Duration
- queryConcurrency int
- queryMaxSamples int
- RemoteFlushDeadline model.Duration
+ agentStoragePath string
+ serverStoragePath string
+ notifier notifier.Options
+ forGracePeriod model.Duration
+ outageTolerance model.Duration
+ resendDelay model.Duration
+ maxConcurrentEvals int64
+ web web.Options
+ scrape scrape.Options
+ tsdb tsdbOptions
+ agent agentOptions
+ lookbackDelta model.Duration
+ webTimeout model.Duration
+ queryTimeout model.Duration
+ queryConcurrency int
+ queryMaxSamples int
+ RemoteFlushDeadline model.Duration
+ maxNotificationsSubscribers int
- featureList []string
- memlimitRatio float64
+ enableAutoReload bool
+ autoReloadInterval model.Duration
+
+ maxprocsEnable bool
+ memlimitEnable bool
+ memlimitRatio float64
+
+ featureList []string
// These options are extracted from featureList
// for ease of use.
- enableExpandExternalLabels bool
- enableNewSDManager bool
- enablePerStepStats bool
- enableAutoGOMAXPROCS bool
- enableAutoGOMEMLIMIT bool
- enableConcurrentRuleEval bool
+ enablePerStepStats bool
+ enableConcurrentRuleEval bool
prometheusURL string
corsRegexString string
- promlogConfig promlog.Config
+ promqlEnableDelayedNameRemoval bool
+
+ promslogConfig promslog.Config
}
// setFeatureListOptions sets the corresponding options from the featureList.
-func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
+func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
for _, f := range c.featureList {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
- case "remote-write-receiver":
- c.web.EnableRemoteWriteReceiver = true
- level.Warn(logger).Log("msg", "Remote write receiver enabled via feature flag remote-write-receiver. This is DEPRECATED. Use --web.enable-remote-write-receiver.")
- case "otlp-write-receiver":
- c.web.EnableOTLPWriteReceiver = true
- level.Info(logger).Log("msg", "Experimental OTLP write receiver enabled")
- case "expand-external-labels":
- c.enableExpandExternalLabels = true
- level.Info(logger).Log("msg", "Experimental expand-external-labels enabled")
case "exemplar-storage":
c.tsdb.EnableExemplarStorage = true
- level.Info(logger).Log("msg", "Experimental in-memory exemplar storage enabled")
+ logger.Info("Experimental in-memory exemplar storage enabled")
case "memory-snapshot-on-shutdown":
c.tsdb.EnableMemorySnapshotOnShutdown = true
- level.Info(logger).Log("msg", "Experimental memory snapshot on shutdown enabled")
+ logger.Info("Experimental memory snapshot on shutdown enabled")
case "extra-scrape-metrics":
c.scrape.ExtraMetrics = true
- level.Info(logger).Log("msg", "Experimental additional scrape metrics enabled")
+ logger.Info("Experimental additional scrape metrics enabled")
case "metadata-wal-records":
c.scrape.AppendMetadata = true
- level.Info(logger).Log("msg", "Experimental metadata records in WAL enabled, required for remote write 2.0")
- case "new-service-discovery-manager":
- c.enableNewSDManager = true
- level.Info(logger).Log("msg", "Experimental service discovery manager")
- case "agent":
- agentMode = true
- level.Info(logger).Log("msg", "Experimental agent mode enabled.")
+ logger.Info("Experimental metadata records in WAL enabled")
case "promql-per-step-stats":
c.enablePerStepStats = true
- level.Info(logger).Log("msg", "Experimental per-step statistics reporting")
- case "auto-gomaxprocs":
- c.enableAutoGOMAXPROCS = true
- level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota")
- case "auto-gomemlimit":
- c.enableAutoGOMEMLIMIT = true
- level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit")
+ logger.Info("Experimental per-step statistics reporting")
+ case "auto-reload-config":
+ c.enableAutoReload = true
+ if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 {
+ c.autoReloadInterval, _ = model.ParseDuration("1s")
+ }
+ logger.Info("Enabled automatic configuration file reloading. Checking for configuration changes every", "interval", c.autoReloadInterval)
case "concurrent-rule-eval":
c.enableConcurrentRuleEval = true
- level.Info(logger).Log("msg", "Experimental concurrent rule evaluation enabled.")
- case "no-default-scrape-port":
- c.scrape.NoDefaultPort = true
- level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.")
+ logger.Info("Experimental concurrent rule evaluation enabled.")
case "promql-experimental-functions":
parser.EnableExperimentalFunctions = true
- level.Info(logger).Log("msg", "Experimental PromQL functions enabled.")
+ logger.Info("Experimental PromQL functions enabled.")
+ case "promql-duration-expr":
+ parser.ExperimentalDurationExpr = true
+ logger.Info("Experimental duration expression parsing enabled.")
case "native-histograms":
c.tsdb.EnableNativeHistograms = true
c.scrape.EnableNativeHistogramsIngestion = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
- level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+ logger.Info("Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+ case "ooo-native-histograms":
+ logger.Warn("This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o)
case "created-timestamp-zero-ingestion":
c.scrape.EnableCreatedTimestampZeroIngestion = true
+ c.web.CTZeroIngestionEnabled = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
- level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+ logger.Info("Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+ case "delayed-compaction":
+ c.tsdb.EnableDelayedCompaction = true
+ logger.Info("Experimental delayed compaction is enabled.")
+ case "promql-delayed-name-removal":
+ c.promqlEnableDelayedNameRemoval = true
+ logger.Info("Experimental PromQL delayed name removal enabled.")
case "":
continue
- case "promql-at-modifier", "promql-negative-offset":
- level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o)
+ case "old-ui":
+ c.web.UseOldUI = true
+ logger.Info("Serving previous version of the Prometheus web UI.")
+ case "otlp-deltatocumulative":
+ c.web.ConvertOTLPDelta = true
+ logger.Info("Converting delta OTLP metrics to cumulative")
+ case "otlp-native-delta-ingestion":
+ // Experimental OTLP native delta ingestion.
+ // This currently just stores the raw delta value as-is with unknown metric type. Better typing and
+ // type-aware functions may come later.
+ // See proposal: https://github.com/prometheus/proposals/pull/48
+ c.web.NativeOTLPDeltaIngestion = true
+ logger.Info("Enabling native ingestion of delta OTLP metrics, storing the raw sample values without conversion. WARNING: Delta support is in an early stage of development. The ingestion and querying process is likely to change over time.")
+ case "type-and-unit-labels":
+ c.scrape.EnableTypeAndUnitLabels = true
+ logger.Info("Experimental type and unit labels enabled")
+ case "use-uncached-io":
+ c.tsdb.UseUncachedIO = true
+ logger.Info("Experimental Uncached IO is enabled.")
default:
- level.Warn(logger).Log("msg", "Unknown option for --enable-feature", "option", o)
+ logger.Warn("Unknown option for --enable-feature", "option", o)
}
}
}
+ if c.web.ConvertOTLPDelta && c.web.NativeOTLPDeltaIngestion {
+ return errors.New("cannot enable otlp-deltatocumulative and otlp-native-delta-ingestion features at the same time")
+ }
+
return nil
}
+// parseCompressionType parses the two compression-related configuration values and returns the CompressionType.
+func parseCompressionType(compress bool, compressType compression.Type) compression.Type {
+ if compress {
+ return compressType
+ }
+ return compression.None
+}
+
func main() {
if os.Getenv("DEBUG") != "" {
runtime.SetBlockProfileRate(20)
runtime.SetMutexProfileFraction(20)
}
- var (
- oldFlagRetentionDuration model.Duration
- newFlagRetentionDuration model.Duration
- )
-
// Unregister the default GoCollector, and reregister with our defaults.
if prometheus.Unregister(collectors.NewGoCollector()) {
prometheus.MustRegister(
@@ -265,6 +330,7 @@ func main() {
collectors.WithGoCollectorRuntimeMetrics(
collectors.MetricsGC,
collectors.MetricsScheduler,
+ collectors.GoRuntimeMetricsRule{Matcher: goregexp.MustCompile(`^/sync/mutex/wait/total:seconds$`)},
),
),
)
@@ -278,7 +344,7 @@ func main() {
Registerer: prometheus.DefaultRegisterer,
Gatherer: prometheus.DefaultGatherer,
},
- promlogConfig: promlog.Config{},
+ promslogConfig: promslog.Config{},
}
a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server").UsageWriter(os.Stdout)
@@ -290,9 +356,16 @@ func main() {
a.Flag("config.file", "Prometheus configuration file path.").
Default("prometheus.yml").StringVar(&cfg.configFile)
- a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry.").
- Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress)
+ a.Flag("config.auto-reload-interval", "Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes.").
+ Default("30s").SetValue(&cfg.autoReloadInterval)
+ a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated.").
+ Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses)
+
+ a.Flag("auto-gomaxprocs", "Automatically set GOMAXPROCS to match Linux container CPU quota").
+ Default("true").BoolVar(&cfg.maxprocsEnable)
+ a.Flag("auto-gomemlimit", "Automatically set GOMEMLIMIT to match Linux container or system memory limit").
+ Default("true").BoolVar(&cfg.memlimitEnable)
a.Flag("auto-gomemlimit.ratio", "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory").
Default("0.9").FloatVar(&cfg.memlimitRatio)
@@ -305,9 +378,12 @@ func main() {
"Maximum duration before timing out read of the request, and closing idle connections.").
Default("5m").SetValue(&cfg.webTimeout)
- a.Flag("web.max-connections", "Maximum number of simultaneous connections.").
+ a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners.").
Default("512").IntVar(&cfg.web.MaxConnections)
+ a.Flag("web.max-notifications-subscribers", "Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close.").
+ Default("16").IntVar(&cfg.maxNotificationsSubscribers)
+
a.Flag("web.external-url",
"The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically.").
PlaceHolder("").StringVar(&cfg.prometheusURL)
@@ -334,6 +410,9 @@ func main() {
a.Flag("web.remote-write-receiver.accepted-protobuf-messages", fmt.Sprintf("List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: %v", supportedRemoteWriteProtoMsgs.String())).
Default(supportedRemoteWriteProtoMsgs.Strings()...).SetValue(rwProtoMsgFlagValue(&cfg.web.AcceptRemoteWriteProtoMsgs))
+ a.Flag("web.enable-otlp-receiver", "Enable API endpoint accepting OTLP write requests.").
+ Default("false").BoolVar(&cfg.web.EnableOTLPWriteReceiver)
+
a.Flag("web.console.templates", "Path to the console template directory, available at /consoles.").
Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath)
@@ -364,11 +443,8 @@ func main() {
"Size at which to split the tsdb WAL segment files. Example: 100MB").
Hidden().PlaceHolder("").BytesVar(&cfg.tsdb.WALSegmentSize)
- serverOnlyFlag(a, "storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead.").
- SetValue(&oldFlagRetentionDuration)
-
- serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms.").
- SetValue(&newFlagRetentionDuration)
+ serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. If neither this flag nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms.").
+ SetValue(&cfg.tsdb.RetentionDuration)
serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B.").
BytesVar(&cfg.tsdb.MaxBytes)
@@ -376,16 +452,18 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
Default("false").BoolVar(&cfg.tsdb.NoLockfile)
- // TODO: Remove in Prometheus 3.0.
- var b bool
- serverOnlyFlag(a, "storage.tsdb.allow-overlapping-blocks", "[DEPRECATED] This flag has no effect. Overlapping blocks are enabled by default now.").
- Default("true").Hidden().BoolVar(&b)
+ serverOnlyFlag(a, "storage.tsdb.allow-overlapping-compaction", "Allow compaction of overlapping blocks. If set to false, TSDB stops vertical compaction and leaves overlapping blocks there. The use case is to let another component handle the compaction of overlapping blocks.").
+ Default("true").Hidden().BoolVar(&cfg.tsdb.EnableOverlappingCompaction)
- serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL.").
- Hidden().Default("true").BoolVar(&cfg.tsdb.WALCompression)
+ var (
+ tsdbWALCompression bool
+ tsdbWALCompressionType string
+ )
+ serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL. If false, the --storage.tsdb.wal-compression-type flag is ignored.").
+ Hidden().Default("true").BoolVar(&tsdbWALCompression)
- serverOnlyFlag(a, "storage.tsdb.wal-compression-type", "Compression algorithm for the tsdb WAL.").
- Hidden().Default(string(wlog.CompressionSnappy)).EnumVar(&cfg.tsdb.WALCompressionType, string(wlog.CompressionSnappy), string(wlog.CompressionZstd))
+ serverOnlyFlag(a, "storage.tsdb.wal-compression-type", "Compression algorithm for the tsdb WAL, used when --storage.tsdb.wal-compression is true.").
+ Hidden().Default(compression.Snappy).EnumVar(&tsdbWALCompressionType, compression.Snappy, compression.Zstd)
serverOnlyFlag(a, "storage.tsdb.head-chunks-write-queue-size", "Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental.").
Default("0").IntVar(&cfg.tsdb.HeadChunksWriteQueueSize)
@@ -393,6 +471,9 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.samples-per-chunk", "Target number of samples per chunk.").
Default("120").Hidden().IntVar(&cfg.tsdb.SamplesPerChunk)
+ serverOnlyFlag(a, "storage.tsdb.delayed-compaction.max-percent", "Sets the upper limit for the random compaction delay, specified as a percentage of the head chunk range. 100 means the compaction can be delayed by up to the entire head chunk range. Only effective when the delayed-compaction feature flag is enabled.").
+ Default("10").Hidden().IntVar(&cfg.tsdb.CompactionDelayMaxPercent)
+
agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage.").
Default("data-agent/").StringVar(&cfg.agentStoragePath)
@@ -400,11 +481,15 @@ func main() {
"Size at which to split WAL segment files. Example: 100MB").
Hidden().PlaceHolder("").BytesVar(&cfg.agent.WALSegmentSize)
- agentOnlyFlag(a, "storage.agent.wal-compression", "Compress the agent WAL.").
- Default("true").BoolVar(&cfg.agent.WALCompression)
+ var (
+ agentWALCompression bool
+ agentWALCompressionType string
+ )
+ agentOnlyFlag(a, "storage.agent.wal-compression", "Compress the agent WAL. If false, the --storage.agent.wal-compression-type flag is ignored.").
+ Default("true").BoolVar(&agentWALCompression)
- agentOnlyFlag(a, "storage.agent.wal-compression-type", "Compression algorithm for the agent WAL.").
- Hidden().Default(string(wlog.CompressionSnappy)).EnumVar(&cfg.agent.WALCompressionType, string(wlog.CompressionSnappy), string(wlog.CompressionZstd))
+ agentOnlyFlag(a, "storage.agent.wal-compression-type", "Compression algorithm for the agent WAL, used when --storage.agent.wal-compression is true.").
+ Hidden().Default(compression.Snappy).EnumVar(&agentWALCompressionType, compression.Snappy, compression.Zstd)
agentOnlyFlag(a, "storage.agent.wal-truncate-frequency",
"The frequency at which to truncate the WAL and remove old data.").
@@ -454,12 +539,12 @@ func main() {
serverOnlyFlag(a, "alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications.").
Default("10000").IntVar(&cfg.notifier.QueueCapacity)
+ serverOnlyFlag(a, "alertmanager.notification-batch-size", "The maximum number of notifications per batch to send to the Alertmanager.").
+ Default(strconv.Itoa(notifier.DefaultMaxBatchSize)).IntVar(&cfg.notifier.MaxBatchSize)
+
serverOnlyFlag(a, "alertmanager.drain-notification-queue-on-shutdown", "Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down.").
Default("true").BoolVar(&cfg.notifier.DrainOnShutdown)
- // TODO: Remove in Prometheus 3.0.
- alertmanagerTimeout := a.Flag("alertmanager.timeout", "[DEPRECATED] This flag has no effect.").Hidden().String()
-
serverOnlyFlag(a, "query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations and federation.").
Default("5m").SetValue(&cfg.lookbackDelta)
@@ -475,12 +560,14 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
- a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
+ a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
- promlogflag.AddFlags(a, &cfg.promlogConfig)
+ a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode)
- a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(ctx *kingpin.ParseContext) error {
+ promslogflag.AddFlags(a, &cfg.promslogConfig)
+
+ a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(_ *kingpin.ParseContext) error {
if err := documentcli.GenerateMarkdown(a.Model(), os.Stdout); err != nil {
os.Exit(1)
return err
@@ -491,15 +578,21 @@ func main() {
_, err := a.Parse(os.Args[1:])
if err != nil {
- fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing command line arguments: %w", err))
+ fmt.Fprintf(os.Stderr, "Error parsing command line arguments: %s\n", err)
a.Usage(os.Args[1:])
os.Exit(2)
}
- logger := promlog.New(&cfg.promlogConfig)
+ logger := promslog.New(&cfg.promslogConfig)
+ slog.SetDefault(logger)
+
+ notifs := notifications.NewNotifications(cfg.maxNotificationsSubscribers, prometheus.DefaultRegisterer)
+ cfg.web.NotificationsSub = notifs.Sub
+ cfg.web.NotificationsGetter = notifs.Get
+ notifs.AddNotification(notifications.StartingUp)
if err := cfg.setFeatureListOptions(logger); err != nil {
- fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing feature list: %w", err))
+ fmt.Fprintf(os.Stderr, "Error parsing feature list: %s\n", err)
os.Exit(1)
}
@@ -523,7 +616,7 @@ func main() {
localStoragePath = cfg.agentStoragePath
}
- cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress)
+ cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddresses[0])
if err != nil {
fmt.Fprintln(os.Stderr, fmt.Errorf("parse external URL %q: %w", cfg.prometheusURL, err))
os.Exit(2)
@@ -535,28 +628,37 @@ func main() {
os.Exit(2)
}
- if *alertmanagerTimeout != "" {
- level.Warn(logger).Log("msg", "The flag --alertmanager.timeout has no effect and will be removed in the future.")
- }
-
// Throw error for invalid config before starting other components.
var cfgFile *config.Config
- if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, log.NewNopLogger()); err != nil {
+ if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil {
absPath, pathErr := filepath.Abs(cfg.configFile)
if pathErr != nil {
absPath = cfg.configFile
}
- level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err)
+ logger.Error(fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err)
os.Exit(2)
}
+ // Get scrape configs to validate dynamically loaded scrape_config_files.
+ // They can change over time, but do the extra validation on startup for better experience.
if _, err := cfgFile.GetScrapeConfigs(); err != nil {
absPath, pathErr := filepath.Abs(cfg.configFile)
if pathErr != nil {
absPath = cfg.configFile
}
- level.Error(logger).Log("msg", fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err)
+ logger.Error(fmt.Sprintf("Error loading dynamic scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err)
os.Exit(2)
}
+
+ // Parse rule files to verify they exist and contain valid rules.
+ if err := rules.ParseFiles(cfgFile.RuleFiles); err != nil {
+ absPath, pathErr := filepath.Abs(cfg.configFile)
+ if pathErr != nil {
+ absPath = cfg.configFile
+ }
+ logger.Error(fmt.Sprintf("Error loading rule file patterns from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err)
+ os.Exit(2)
+ }
+
if cfg.tsdb.EnableExemplarStorage {
if cfgFile.StorageConfig.ExemplarsConfig == nil {
cfgFile.StorageConfig.ExemplarsConfig = &config.DefaultExemplarsConfig
@@ -567,6 +669,32 @@ func main() {
cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
}
+ // Set Go runtime parameters before we get too far into initialization.
+ updateGoGC(cfgFile, logger)
+ if cfg.maxprocsEnable {
+ l := func(format string, a ...interface{}) {
+ logger.Info(fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...), "component", "automaxprocs")
+ }
+ if _, err := maxprocs.Set(maxprocs.Logger(l)); err != nil {
+ logger.Warn("Failed to set GOMAXPROCS automatically", "component", "automaxprocs", "err", err)
+ }
+ }
+
+ if cfg.memlimitEnable {
+ if _, err := memlimit.SetGoMemLimitWithOpts(
+ memlimit.WithRatio(cfg.memlimitRatio),
+ memlimit.WithProvider(
+ memlimit.ApplyFallback(
+ memlimit.FromCgroup,
+ memlimit.FromSystem,
+ ),
+ ),
+ memlimit.WithLogger(logger.With("component", "automemlimit")),
+ ); err != nil {
+ logger.Warn("automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err)
+ }
+ }
+
// Now that the validity of the config is established, set the config
// success metrics accordingly, although the config isn't really loaded
// yet. This will happen later (including setting these metrics again),
@@ -585,20 +713,9 @@ func main() {
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
if !agentMode {
- // Time retention settings.
- if oldFlagRetentionDuration != 0 {
- level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.")
- cfg.tsdb.RetentionDuration = oldFlagRetentionDuration
- }
-
- // When the new flag is set it takes precedence.
- if newFlagRetentionDuration != 0 {
- cfg.tsdb.RetentionDuration = newFlagRetentionDuration
- }
-
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 {
cfg.tsdb.RetentionDuration = defaultRetentionDuration
- level.Info(logger).Log("msg", "No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
+ logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
}
// Check for overflows. This limits our max retention to 100y.
@@ -608,7 +725,7 @@ func main() {
panic(err)
}
cfg.tsdb.RetentionDuration = y
- level.Warn(logger).Log("msg", "Time retention value is too high. Limiting to: "+y.String())
+ logger.Warn("Time retention value is too high. Limiting to: " + y.String())
}
// Max block size settings.
@@ -624,16 +741,23 @@ func main() {
cfg.tsdb.MaxBlockDuration = maxBlockDuration
}
+
+ // Delayed compaction checks
+ if cfg.tsdb.EnableDelayedCompaction && (cfg.tsdb.CompactionDelayMaxPercent > 100 || cfg.tsdb.CompactionDelayMaxPercent <= 0) {
+ logger.Warn("The --storage.tsdb.delayed-compaction.max-percent should have a value between 1 and 100. Using default", "default", tsdb.DefaultCompactionDelayMaxPercent)
+ cfg.tsdb.CompactionDelayMaxPercent = tsdb.DefaultCompactionDelayMaxPercent
+ }
+
+ cfg.tsdb.WALCompressionType = parseCompressionType(tsdbWALCompression, tsdbWALCompressionType)
+ } else {
+ cfg.agent.WALCompressionType = parseCompressionType(agentWALCompression, agentWALCompressionType)
}
noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{}
noStepSubqueryInterval.Set(config.DefaultGlobalConfig.EvaluationInterval)
- // Above level 6, the k8s client would log bearer tokens in clear-text.
- klog.ClampLevel(6)
- klog.SetLogger(log.With(logger, "component", "k8s_client_runtime"))
- klogv2.ClampLevel(6)
- klogv2.SetLogger(log.With(logger, "component", "k8s_client_runtime"))
+ klogv2.SetSlogLogger(logger.With("component", "k8s_client_runtime"))
+ klog.SetOutputBySeverity("INFO", klogv1Writer{})
modeAppName := "Prometheus Server"
mode := "server"
@@ -642,20 +766,22 @@ func main() {
mode = "agent"
}
- level.Info(logger).Log("msg", "Starting "+modeAppName, "mode", mode, "version", version.Info())
+ logger.Info("Starting "+modeAppName, "mode", mode, "version", version.Info())
if bits.UintSize < 64 {
- level.Warn(logger).Log("msg", "This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH)
+ logger.Warn("This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH)
}
- level.Info(logger).Log("build_context", version.BuildContext())
- level.Info(logger).Log("host_details", prom_runtime.Uname())
- level.Info(logger).Log("fd_limits", prom_runtime.FdLimits())
- level.Info(logger).Log("vm_limits", prom_runtime.VMLimits())
+ logger.Info("operational information",
+ "build_context", version.BuildContext(),
+ "host_details", prom_runtime.Uname(),
+ "fd_limits", prom_runtime.FdLimits(),
+ "vm_limits", prom_runtime.VMLimits(),
+ )
var (
localStorage = &readyStorage{stats: tsdb.NewDBStats()}
scraper = &readyScrapeManager{}
- remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata)
+ remoteStorage = remote.NewStorage(logger.With("component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper)
fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage)
)
@@ -663,12 +789,12 @@ func main() {
ctxWeb, cancelWeb = context.WithCancel(context.Background())
ctxRule = context.Background()
- notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier"))
+ notifierManager = notifier.NewManager(&cfg.notifier, logger.With("component", "notifier"))
ctxScrape, cancelScrape = context.WithCancel(context.Background())
ctxNotify, cancelNotify = context.WithCancel(context.Background())
- discoveryManagerScrape discoveryManager
- discoveryManagerNotify discoveryManager
+ discoveryManagerScrape *discovery.Manager
+ discoveryManagerNotify *discovery.Manager
)
// Kubernetes client metrics are used by Kubernetes SD.
@@ -678,62 +804,37 @@ func main() {
// they are not specific to an SD instance.
err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer)
if err != nil {
- level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err)
+ logger.Error("failed to register Kubernetes client metrics", "err", err)
os.Exit(1)
}
sdMetrics, err := discovery.CreateAndRegisterSDMetrics(prometheus.DefaultRegisterer)
if err != nil {
- level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err)
+ logger.Error("failed to register service discovery metrics", "err", err)
os.Exit(1)
}
- if cfg.enableNewSDManager {
- {
- discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"))
- if discMgr == nil {
- level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
- os.Exit(1)
- }
- discoveryManagerScrape = discMgr
- }
+ discoveryManagerScrape = discovery.NewManager(ctxScrape, logger.With("component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"))
+ if discoveryManagerScrape == nil {
+ logger.Error("failed to create a discovery manager scrape")
+ os.Exit(1)
+ }
- {
- discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"))
- if discMgr == nil {
- level.Error(logger).Log("msg", "failed to create a discovery manager notify")
- os.Exit(1)
- }
- discoveryManagerNotify = discMgr
- }
- } else {
- {
- discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("scrape"))
- if discMgr == nil {
- level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
- os.Exit(1)
- }
- discoveryManagerScrape = discMgr
- }
-
- {
- discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("notify"))
- if discMgr == nil {
- level.Error(logger).Log("msg", "failed to create a discovery manager notify")
- os.Exit(1)
- }
- discoveryManagerNotify = discMgr
- }
+ discoveryManagerNotify = discovery.NewManager(ctxNotify, logger.With("component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"))
+ if discoveryManagerNotify == nil {
+ logger.Error("failed to create a discovery manager notify")
+ os.Exit(1)
}
scrapeManager, err := scrape.NewManager(
&cfg.scrape,
- log.With(logger, "component", "scrape manager"),
+ logger.With("component", "scrape manager"),
+ logging.NewJSONFileLogger,
fanoutStorage,
prometheus.DefaultRegisterer,
)
if err != nil {
- level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err)
+ logger.Error("failed to create a scrape manager", "err", err)
os.Exit(1)
}
@@ -744,43 +845,22 @@ func main() {
ruleManager *rules.Manager
)
- if cfg.enableAutoGOMAXPROCS {
- l := func(format string, a ...interface{}) {
- level.Info(logger).Log("component", "automaxprocs", "msg", fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...))
- }
- if _, err := maxprocs.Set(maxprocs.Logger(l)); err != nil {
- level.Warn(logger).Log("component", "automaxprocs", "msg", "Failed to set GOMAXPROCS automatically", "err", err)
- }
- }
-
- if cfg.enableAutoGOMEMLIMIT {
- if _, err := memlimit.SetGoMemLimitWithOpts(
- memlimit.WithRatio(cfg.memlimitRatio),
- memlimit.WithProvider(
- memlimit.ApplyFallback(
- memlimit.FromCgroup,
- memlimit.FromSystem,
- ),
- ),
- ); err != nil {
- level.Warn(logger).Log("component", "automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err)
- }
- }
-
if !agentMode {
opts := promql.EngineOpts{
- Logger: log.With(logger, "component", "query engine"),
+ Logger: logger.With("component", "query engine"),
Reg: prometheus.DefaultRegisterer,
MaxSamples: cfg.queryMaxSamples,
Timeout: time.Duration(cfg.queryTimeout),
- ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, log.With(logger, "component", "activeQueryTracker")),
+ ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, logger.With("component", "activeQueryTracker")),
LookbackDelta: time.Duration(cfg.lookbackDelta),
NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get,
// EnableAtModifier and EnableNegativeOffset have to be
// always on for regular PromQL as of Prometheus v2.33.
- EnableAtModifier: true,
- EnableNegativeOffset: true,
- EnablePerStepStats: cfg.enablePerStepStats,
+ EnableAtModifier: true,
+ EnableNegativeOffset: true,
+ EnablePerStepStats: cfg.enablePerStepStats,
+ EnableDelayedNameRemoval: cfg.promqlEnableDelayedNameRemoval,
+ EnableTypeAndUnitLabels: cfg.scrape.EnableTypeAndUnitLabels,
}
queryEngine = promql.NewEngine(opts)
@@ -793,7 +873,7 @@ func main() {
Context: ctxRule,
ExternalURL: cfg.web.ExternalURL,
Registerer: prometheus.DefaultRegisterer,
- Logger: log.With(logger, "component", "rule manager"),
+ Logger: logger.With("component", "rule manager"),
OutageTolerance: time.Duration(cfg.outageTolerance),
ForGracePeriod: time.Duration(cfg.forGracePeriod),
ResendDelay: time.Duration(cfg.resendDelay),
@@ -844,7 +924,7 @@ func main() {
}
// Depends on cfg.web.ScrapeManager so needs to be after cfg.web.ScrapeManager = scrapeManager.
- webHandler := web.New(log.With(logger, "component", "web"), &cfg.web)
+ webHandler := web.New(logger.With("component", "web"), &cfg.web)
// Monitor outgoing connections on default transport with conntrack.
http.DefaultTransport.(*http.Transport).DialContext = conntrack.NewDialContextFunc(
@@ -969,15 +1049,15 @@ func main() {
})
}
- listener, err := webHandler.Listener()
+ listeners, err := webHandler.Listeners()
if err != nil {
- level.Error(logger).Log("msg", "Unable to start web listener", "err", err)
+ logger.Error("Unable to start web listener", "err", err)
os.Exit(1)
}
err = toolkit_web.Validate(*webConfig)
if err != nil {
- level.Error(logger).Log("msg", "Unable to validate web configuration file", "err", err)
+ logger.Error("Unable to validate web configuration file", "err", err)
os.Exit(1)
}
@@ -992,18 +1072,19 @@ func main() {
// Don't forget to release the reloadReady channel so that waiting blocks can exit normally.
select {
case sig := <-term:
- level.Warn(logger).Log("msg", "Received an OS signal, exiting gracefully...", "signal", sig.String())
+ logger.Warn("Received an OS signal, exiting gracefully...", "signal", sig.String())
reloadReady.Close()
case <-webHandler.Quit():
- level.Warn(logger).Log("msg", "Received termination request via web service, exiting gracefully...")
+ logger.Warn("Received termination request via web service, exiting gracefully...")
case <-cancel:
reloadReady.Close()
}
return nil
},
- func(err error) {
+ func(_ error) {
close(cancel)
- webHandler.SetReady(false)
+ webHandler.SetReady(web.Stopping)
+ notifs.AddNotification(notifications.ShuttingDown)
},
)
}
@@ -1012,11 +1093,11 @@ func main() {
g.Add(
func() error {
err := discoveryManagerScrape.Run()
- level.Info(logger).Log("msg", "Scrape discovery manager stopped")
+ logger.Info("Scrape discovery manager stopped")
return err
},
- func(err error) {
- level.Info(logger).Log("msg", "Stopping scrape discovery manager...")
+ func(_ error) {
+ logger.Info("Stopping scrape discovery manager...")
cancelScrape()
},
)
@@ -1026,11 +1107,11 @@ func main() {
g.Add(
func() error {
err := discoveryManagerNotify.Run()
- level.Info(logger).Log("msg", "Notify discovery manager stopped")
+ logger.Info("Notify discovery manager stopped")
return err
},
- func(err error) {
- level.Info(logger).Log("msg", "Stopping notify discovery manager...")
+ func(_ error) {
+ logger.Info("Stopping notify discovery manager...")
cancelNotify()
},
)
@@ -1043,7 +1124,7 @@ func main() {
ruleManager.Run()
return nil
},
- func(err error) {
+ func(_ error) {
ruleManager.Stop()
},
)
@@ -1059,15 +1140,15 @@ func main() {
<-reloadReady.C
err := scrapeManager.Run(discoveryManagerScrape.SyncCh())
- level.Info(logger).Log("msg", "Scrape manager stopped")
+ logger.Info("Scrape manager stopped")
return err
},
- func(err error) {
+ func(_ error) {
// Scrape manager needs to be stopped before closing the local TSDB
// so that it doesn't try to write samples to a closed storage.
// We should also wait for rule manager to be fully stopped to ensure
// we don't trigger any false positive alerts for rules using absent().
- level.Info(logger).Log("msg", "Stopping scrape manager...")
+ logger.Info("Stopping scrape manager...")
scrapeManager.Stop()
},
)
@@ -1080,7 +1161,7 @@ func main() {
tracingManager.Run()
return nil
},
- func(err error) {
+ func(_ error) {
tracingManager.Stop()
},
)
@@ -1093,6 +1174,23 @@ func main() {
hup := make(chan os.Signal, 1)
signal.Notify(hup, syscall.SIGHUP)
cancel := make(chan struct{})
+
+ var checksum string
+ if cfg.enableAutoReload {
+ checksum, err = config.GenerateChecksum(cfg.configFile)
+ if err != nil {
+ logger.Error("Failed to generate initial checksum for configuration file", "err", err)
+ }
+ }
+
+ callback := func(success bool) {
+ if success {
+ notifs.DeleteNotification(notifications.ConfigurationUnsuccessful)
+ return
+ }
+ notifs.AddNotification(notifications.ConfigurationUnsuccessful)
+ }
+
g.Add(
func() error {
<-reloadReady.C
@@ -1100,22 +1198,51 @@ func main() {
for {
select {
case <-hup:
- if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
- level.Error(logger).Log("msg", "Error reloading config", "err", err)
+ if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil {
+ logger.Error("Error reloading config", "err", err)
+ } else if cfg.enableAutoReload {
+ checksum, err = config.GenerateChecksum(cfg.configFile)
+ if err != nil {
+ logger.Error("Failed to generate checksum during configuration reload", "err", err)
+ }
}
case rc := <-webHandler.Reload():
- if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
- level.Error(logger).Log("msg", "Error reloading config", "err", err)
+ if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil {
+ logger.Error("Error reloading config", "err", err)
rc <- err
} else {
rc <- nil
+ if cfg.enableAutoReload {
+ checksum, err = config.GenerateChecksum(cfg.configFile)
+ if err != nil {
+ logger.Error("Failed to generate checksum during configuration reload", "err", err)
+ }
+ }
+ }
+ case <-time.Tick(time.Duration(cfg.autoReloadInterval)):
+ if !cfg.enableAutoReload {
+ continue
+ }
+ currentChecksum, err := config.GenerateChecksum(cfg.configFile)
+ if err != nil {
+ checksum = currentChecksum
+ logger.Error("Failed to generate checksum during configuration reload", "err", err)
+ } else if currentChecksum == checksum {
+ continue
+ }
+ logger.Info("Configuration file change detected, reloading the configuration.")
+
+ if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil {
+ logger.Error("Error reloading config", "err", err)
+ } else {
+ checksum = currentChecksum
}
case <-cancel:
return nil
}
}
},
- func(err error) {
+ func(_ error) {
// Wait for any in-progress reloads to complete to avoid
// reloading things after they have been shutdown.
cancel <- struct{}{}
@@ -1135,18 +1262,19 @@ func main() {
return nil
}
- if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
+ if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil {
return fmt.Errorf("error loading config from %q: %w", cfg.configFile, err)
}
reloadReady.Close()
- webHandler.SetReady(true)
- level.Info(logger).Log("msg", "Server is ready to receive web requests.")
+ webHandler.SetReady(web.Ready)
+ notifs.DeleteNotification(notifications.StartingUp)
+ logger.Info("Server is ready to receive web requests.")
<-cancel
return nil
},
- func(err error) {
+ func(_ error) {
close(cancel)
},
)
@@ -1157,7 +1285,7 @@ func main() {
cancel := make(chan struct{})
g.Add(
func() error {
- level.Info(logger).Log("msg", "Starting TSDB ...")
+ logger.Info("Starting TSDB ...")
if cfg.tsdb.WALSegmentSize != 0 {
if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 {
return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB")
@@ -1176,20 +1304,20 @@ func main() {
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
- level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
+ logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType)
default:
- level.Info(logger).Log("fs_type", fsType)
+ logger.Info("filesystem information", "fs_type", fsType)
}
- level.Info(logger).Log("msg", "TSDB started")
- level.Debug(logger).Log("msg", "TSDB options",
+ logger.Info("TSDB started")
+ logger.Debug("TSDB options",
"MinBlockDuration", cfg.tsdb.MinBlockDuration,
"MaxBlockDuration", cfg.tsdb.MaxBlockDuration,
"MaxBytes", cfg.tsdb.MaxBytes,
"NoLockfile", cfg.tsdb.NoLockfile,
"RetentionDuration", cfg.tsdb.RetentionDuration,
"WALSegmentSize", cfg.tsdb.WALSegmentSize,
- "WALCompression", cfg.tsdb.WALCompression,
+ "WALCompressionType", cfg.tsdb.WALCompressionType,
)
startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000)
@@ -1199,9 +1327,9 @@ func main() {
<-cancel
return nil
},
- func(err error) {
+ func(_ error) {
if err := fanoutStorage.Close(); err != nil {
- level.Error(logger).Log("msg", "Error stopping storage", "err", err)
+ logger.Error("Error stopping storage", "err", err)
}
close(cancel)
},
@@ -1213,7 +1341,7 @@ func main() {
cancel := make(chan struct{})
g.Add(
func() error {
- level.Info(logger).Log("msg", "Starting WAL storage ...")
+ logger.Info("Starting WAL storage ...")
if cfg.agent.WALSegmentSize != 0 {
if cfg.agent.WALSegmentSize < 10*1024*1024 || cfg.agent.WALSegmentSize > 256*1024*1024 {
return errors.New("flag 'storage.agent.wal-segment-size' must be set between 10MB and 256MB")
@@ -1232,15 +1360,15 @@ func main() {
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
- level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
+ logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
default:
- level.Info(logger).Log("fs_type", fsType)
+ logger.Info(fsType)
}
- level.Info(logger).Log("msg", "Agent WAL storage started")
- level.Debug(logger).Log("msg", "Agent WAL storage options",
+ logger.Info("Agent WAL storage started")
+ logger.Debug("Agent WAL storage options",
"WALSegmentSize", cfg.agent.WALSegmentSize,
- "WALCompression", cfg.agent.WALCompression,
+ "WALCompressionType", cfg.agent.WALCompressionType,
"StripeSize", cfg.agent.StripeSize,
"TruncateFrequency", cfg.agent.TruncateFrequency,
"MinWALTime", cfg.agent.MinWALTime,
@@ -1254,9 +1382,9 @@ func main() {
<-cancel
return nil
},
- func(e error) {
+ func(_ error) {
if err := fanoutStorage.Close(); err != nil {
- level.Error(logger).Log("msg", "Error stopping storage", "err", err)
+ logger.Error("Error stopping storage", "err", err)
}
close(cancel)
},
@@ -1266,12 +1394,12 @@ func main() {
// Web handler.
g.Add(
func() error {
- if err := webHandler.Run(ctxWeb, listener, *webConfig); err != nil {
+ if err := webHandler.Run(ctxWeb, listeners, *webConfig); err != nil {
return fmt.Errorf("error starting web server: %w", err)
}
return nil
},
- func(err error) {
+ func(_ error) {
cancelWeb()
},
)
@@ -1290,25 +1418,27 @@ func main() {
<-reloadReady.C
notifierManager.Run(discoveryManagerNotify.SyncCh())
- level.Info(logger).Log("msg", "Notifier manager stopped")
+ logger.Info("Notifier manager stopped")
return nil
},
- func(err error) {
+ func(_ error) {
notifierManager.Stop()
},
)
}
- if err := g.Run(); err != nil {
- level.Error(logger).Log("err", err)
- os.Exit(1)
- }
- level.Info(logger).Log("msg", "See you next time!")
+ func() { // This function exists so the top of the stack is named 'main.main.funcxxx' and not 'oklog'.
+ if err := g.Run(); err != nil {
+ logger.Error("Fatal error", "err", err)
+ os.Exit(1)
+ }
+ }()
+ logger.Info("See you next time!")
}
-func openDBWithMetrics(dir string, logger log.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) {
+func openDBWithMetrics(dir string, logger *slog.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) {
db, err := tsdb.Open(
dir,
- log.With(logger, "component", "tsdb"),
+ logger.With("component", "tsdb"),
reg,
opts,
stats,
@@ -1361,21 +1491,23 @@ type reloader struct {
reloader func(*config.Config) error
}
-func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage bool, logger log.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, rls ...reloader) (err error) {
+func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) {
start := time.Now()
- timings := []interface{}{}
- level.Info(logger).Log("msg", "Loading configuration file", "filename", filename)
+ timingsLogger := logger
+ logger.Info("Loading configuration file", "filename", filename)
defer func() {
if err == nil {
configSuccess.Set(1)
configSuccessTime.SetToCurrentTime()
+ callback(true)
} else {
configSuccess.Set(0)
+ callback(false)
}
}()
- conf, err := config.LoadFile(filename, agentMode, expandExternalLabels, logger)
+ conf, err := config.LoadFile(filename, agentMode, logger)
if err != nil {
return fmt.Errorf("couldn't load configuration (--config.file=%q): %w", filename, err)
}
@@ -1390,18 +1522,26 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b
for _, rl := range rls {
rstart := time.Now()
if err := rl.reloader(conf); err != nil {
- level.Error(logger).Log("msg", "Failed to apply configuration", "err", err)
+ logger.Error("Failed to apply configuration", "err", err)
failed = true
}
- timings = append(timings, rl.name, time.Since(rstart))
+ timingsLogger = timingsLogger.With((rl.name), time.Since(rstart))
}
if failed {
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
}
+ updateGoGC(conf, logger)
+
+ noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval)
+ timingsLogger.Info("Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start))
+ return nil
+}
+
+func updateGoGC(conf *config.Config, logger *slog.Logger) {
oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC)
if oldGoGC != conf.Runtime.GoGC {
- level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC)
+ logger.Info("updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC)
}
// Write the new setting out to the ENV var for runtime API output.
if conf.Runtime.GoGC >= 0 {
@@ -1409,11 +1549,6 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b
} else {
os.Setenv("GOGC", "off")
}
-
- noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval)
- l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)}
- level.Info(logger).Log(append(l, timings...)...)
- return nil
}
func startsOrEndsWithQuote(s string) bool {
@@ -1566,23 +1701,30 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender {
type notReadyAppender struct{}
-func (n notReadyAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) {
+// SetOptions does nothing in this appender implementation.
+func (n notReadyAppender) SetOptions(_ *storage.AppendOptions) {}
+
+func (n notReadyAppender) Append(_ storage.SeriesRef, _ labels.Labels, _ int64, _ float64) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
-func (n notReadyAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) {
+func (n notReadyAppender) AppendExemplar(_ storage.SeriesRef, _ labels.Labels, _ exemplar.Exemplar) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
-func (n notReadyAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
+func (n notReadyAppender) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, _ int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
-func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) {
+func (n notReadyAppender) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
-func (n notReadyAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) {
+func (n notReadyAppender) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) {
+ return 0, tsdb.ErrNotReady
+}
+
+func (n notReadyAppender) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
@@ -1667,7 +1809,7 @@ func (s *readyStorage) WALReplayStatus() (tsdb.WALReplayStatus, error) {
}
// ErrNotReady is returned if the underlying scrape manager is not ready yet.
-var ErrNotReady = errors.New("Scrape manager not ready")
+var ErrNotReady = errors.New("scrape manager not ready")
// ReadyScrapeManager allows a scrape manager to be retrieved. Even if it's set at a later point in time.
type readyScrapeManager struct {
@@ -1703,8 +1845,7 @@ type tsdbOptions struct {
RetentionDuration model.Duration
MaxBytes units.Base2Bytes
NoLockfile bool
- WALCompression bool
- WALCompressionType string
+ WALCompressionType compression.Type
HeadChunksWriteQueueSize int
SamplesPerChunk int
StripeSize int
@@ -1715,6 +1856,10 @@ type tsdbOptions struct {
MaxExemplars int64
EnableMemorySnapshotOnShutdown bool
EnableNativeHistograms bool
+ EnableDelayedCompaction bool
+ CompactionDelayMaxPercent int
+ EnableOverlappingCompaction bool
+ UseUncachedIO bool
}
func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
@@ -1724,7 +1869,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond),
MaxBytes: int64(opts.MaxBytes),
NoLockfile: opts.NoLockfile,
- WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
+ WALCompression: opts.WALCompressionType,
HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize,
SamplesPerChunk: opts.SamplesPerChunk,
StripeSize: opts.StripeSize,
@@ -1735,7 +1880,10 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown,
EnableNativeHistograms: opts.EnableNativeHistograms,
OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow,
- EnableOverlappingCompaction: true,
+ EnableDelayedCompaction: opts.EnableDelayedCompaction,
+ CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent,
+ EnableOverlappingCompaction: opts.EnableOverlappingCompaction,
+ UseUncachedIO: opts.UseUncachedIO,
}
}
@@ -1743,8 +1891,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
// as agent.Option fields are unit agnostic (time).
type agentOptions struct {
WALSegmentSize units.Base2Bytes
- WALCompression bool
- WALCompressionType string
+ WALCompressionType compression.Type
StripeSize int
TruncateFrequency model.Duration
MinWALTime, MaxWALTime model.Duration
@@ -1758,7 +1905,7 @@ func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Option
}
return agent.Options{
WALSegmentSize: int(opts.WALSegmentSize),
- WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
+ WALCompression: opts.WALCompressionType,
StripeSize: opts.StripeSize,
TruncateFrequency: time.Duration(opts.TruncateFrequency),
MinWALTime: durationToInt64Millis(time.Duration(opts.MinWALTime)),
@@ -1768,15 +1915,6 @@ func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Option
}
}
-// discoveryManager interfaces the discovery manager. This is used to keep using
-// the manager that restarts SD's on reload for a few releases until we feel
-// the new manager can be enabled for all users.
-type discoveryManager interface {
- ApplyConfig(cfg map[string]discovery.Configs) error
- Run() error
- SyncCh() <-chan map[string][]*targetgroup.Group
-}
-
// rwProtoMsgFlagParser is a custom parser for config.RemoteWriteProtoMsg enum.
type rwProtoMsgFlagParser struct {
msgs *[]config.RemoteWriteProtoMsg
@@ -1804,10 +1942,8 @@ func (p *rwProtoMsgFlagParser) Set(opt string) error {
if err := t.Validate(); err != nil {
return err
}
- for _, prev := range *p.msgs {
- if prev == t {
- return fmt.Errorf("duplicated %v flag value, got %v already", t, *p.msgs)
- }
+ if slices.Contains(*p.msgs, t) {
+ return fmt.Errorf("duplicated %v flag value, got %v already", t, *p.msgs)
}
*p.msgs = append(*p.msgs, t)
return nil
diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go
index c827812e60..e4262f1b3b 100644
--- a/cmd/prometheus/main_test.go
+++ b/cmd/prometheus/main_test.go
@@ -20,28 +20,38 @@ import (
"fmt"
"io"
"math"
+ "net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
+ "sync"
"syscall"
"testing"
"time"
"github.com/alecthomas/kingpin/v2"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/rules"
+ "github.com/prometheus/prometheus/util/testutil"
)
+func init() {
+ // This can be removed when the legacy global mode is fully deprecated.
+ //nolint:staticcheck
+ model.NameValidationScheme = model.UTF8Validation
+}
+
const startupTime = 10 * time.Second
var (
@@ -120,6 +130,7 @@ func TestFailedStartupExitCode(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
+ t.Parallel()
fakeInputFile := "fake-input-file"
expectedExitStatus := 2
@@ -206,83 +217,139 @@ func TestWALSegmentSizeBounds(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
+ t.Parallel()
- for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} {
- prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
+ for _, tc := range []struct {
+ size string
+ exitCode int
+ }{
+ {
+ size: "9MB",
+ exitCode: 1,
+ },
+ {
+ size: "257MB",
+ exitCode: 1,
+ },
+ {
+ size: "10",
+ exitCode: 2,
+ },
+ {
+ size: "1GB",
+ exitCode: 1,
+ },
+ {
+ size: "12MB",
+ exitCode: 0,
+ },
+ } {
+ t.Run(tc.size, func(t *testing.T) {
+ t.Parallel()
+ prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
- // Log stderr in case of failure.
- stderr, err := prom.StderrPipe()
- require.NoError(t, err)
- go func() {
- slurp, _ := io.ReadAll(stderr)
- t.Log(string(slurp))
- }()
+ // Log stderr in case of failure.
+ stderr, err := prom.StderrPipe()
+ require.NoError(t, err)
- err = prom.Start()
- require.NoError(t, err)
+ // WaitGroup is used to ensure that we don't call t.Log() after the test has finished.
+ var wg sync.WaitGroup
+ wg.Add(1)
+ defer wg.Wait()
- if expectedExitStatus == 0 {
- done := make(chan error, 1)
- go func() { done <- prom.Wait() }()
- select {
- case err := <-done:
- require.Fail(t, "prometheus should be still running: %v", err)
- case <-time.After(startupTime):
- prom.Process.Kill()
- <-done
+ go func() {
+ defer wg.Done()
+ slurp, _ := io.ReadAll(stderr)
+ t.Log(string(slurp))
+ }()
+
+ err = prom.Start()
+ require.NoError(t, err)
+
+ if tc.exitCode == 0 {
+ done := make(chan error, 1)
+ go func() { done <- prom.Wait() }()
+ select {
+ case err := <-done:
+ t.Fatalf("prometheus should be still running: %v", err)
+ case <-time.After(startupTime):
+ prom.Process.Kill()
+ <-done
+ }
+ return
}
- continue
- }
- err = prom.Wait()
- require.Error(t, err)
- var exitError *exec.ExitError
- require.ErrorAs(t, err, &exitError)
- status := exitError.Sys().(syscall.WaitStatus)
- require.Equal(t, expectedExitStatus, status.ExitStatus())
+ err = prom.Wait()
+ require.Error(t, err)
+ var exitError *exec.ExitError
+ require.ErrorAs(t, err, &exitError)
+ status := exitError.Sys().(syscall.WaitStatus)
+ require.Equal(t, tc.exitCode, status.ExitStatus())
+ })
}
}
func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) {
- t.Parallel()
-
if testing.Short() {
t.Skip("skipping test in short mode.")
}
+ t.Parallel()
- for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} {
- prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
+ for _, tc := range []struct {
+ size string
+ exitCode int
+ }{
+ {
+ size: "512KB",
+ exitCode: 1,
+ },
+ {
+ size: "1MB",
+ exitCode: 0,
+ },
+ } {
+ t.Run(tc.size, func(t *testing.T) {
+ t.Parallel()
+ prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
- // Log stderr in case of failure.
- stderr, err := prom.StderrPipe()
- require.NoError(t, err)
- go func() {
- slurp, _ := io.ReadAll(stderr)
- t.Log(string(slurp))
- }()
+ // Log stderr in case of failure.
+ stderr, err := prom.StderrPipe()
+ require.NoError(t, err)
- err = prom.Start()
- require.NoError(t, err)
+ // WaitGroup is used to ensure that we don't call t.Log() after the test has finished.
+ var wg sync.WaitGroup
+ wg.Add(1)
+ defer wg.Wait()
- if expectedExitStatus == 0 {
- done := make(chan error, 1)
- go func() { done <- prom.Wait() }()
- select {
- case err := <-done:
- require.Fail(t, "prometheus should be still running: %v", err)
- case <-time.After(startupTime):
- prom.Process.Kill()
- <-done
+ go func() {
+ defer wg.Done()
+ slurp, _ := io.ReadAll(stderr)
+ t.Log(string(slurp))
+ }()
+
+ err = prom.Start()
+ require.NoError(t, err)
+
+ if tc.exitCode == 0 {
+ done := make(chan error, 1)
+ go func() { done <- prom.Wait() }()
+ select {
+ case err := <-done:
+ t.Fatalf("prometheus should be still running: %v", err)
+ case <-time.After(startupTime):
+ prom.Process.Kill()
+ <-done
+ }
+ return
}
- continue
- }
- err = prom.Wait()
- require.Error(t, err)
- var exitError *exec.ExitError
- require.ErrorAs(t, err, &exitError)
- status := exitError.Sys().(syscall.WaitStatus)
- require.Equal(t, expectedExitStatus, status.ExitStatus())
+ err = prom.Wait()
+ require.Error(t, err)
+ var exitError *exec.ExitError
+ require.ErrorAs(t, err, &exitError)
+ status := exitError.Sys().(syscall.WaitStatus)
+ require.Equal(t, tc.exitCode, status.ExitStatus())
+ })
}
}
@@ -290,7 +357,7 @@ func TestTimeMetrics(t *testing.T) {
tmpDir := t.TempDir()
reg := prometheus.NewRegistry()
- db, err := openDBWithMetrics(tmpDir, log.NewNopLogger(), reg, nil, nil)
+ db, err := openDBWithMetrics(tmpDir, promslog.NewNopLogger(), reg, nil, nil)
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
@@ -348,7 +415,9 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames
}
func TestAgentSuccessfulStartup(t *testing.T) {
- prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig)
+ t.Parallel()
+
+ prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig)
require.NoError(t, prom.Start())
actualExitStatus := 0
@@ -366,7 +435,9 @@ func TestAgentSuccessfulStartup(t *testing.T) {
}
func TestAgentFailedStartupWithServerFlag(t *testing.T) {
- prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
+ t.Parallel()
+
+ prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
output := bytes.Buffer{}
prom.Stderr = &output
@@ -393,7 +464,9 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) {
}
func TestAgentFailedStartupWithInvalidConfig(t *testing.T) {
- prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
+ t.Parallel()
+
+ prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
require.NoError(t, prom.Start())
actualExitStatus := 0
@@ -414,6 +487,7 @@ func TestModeSpecificFlags(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
+ t.Parallel()
testcases := []struct {
mode string
@@ -428,10 +502,11 @@ func TestModeSpecificFlags(t *testing.T) {
for _, tc := range testcases {
t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) {
+ t.Parallel()
args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"}
if tc.mode == "agent" {
- args = append(args, "--enable-feature=agent", "--config.file="+agentConfig)
+ args = append(args, "--agent", "--config.file="+agentConfig)
} else {
args = append(args, "--config.file="+promConfig)
}
@@ -441,7 +516,14 @@ func TestModeSpecificFlags(t *testing.T) {
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
+
+ // WaitGroup is used to ensure that we don't call t.Log() after the test has finished.
+ var wg sync.WaitGroup
+ wg.Add(1)
+ defer wg.Wait()
+
go func() {
+ defer wg.Done()
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
@@ -479,6 +561,8 @@ func TestDocumentation(t *testing.T) {
if runtime.GOOS == "windows" {
t.SkipNow()
}
+ t.Parallel()
+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
@@ -503,6 +587,8 @@ func TestDocumentation(t *testing.T) {
}
func TestRwProtoMsgFlagParser(t *testing.T) {
+ t.Parallel()
+
defaultOpts := config.RemoteWriteProtoMsgs{
config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2,
}
@@ -563,3 +649,236 @@ func TestRwProtoMsgFlagParser(t *testing.T) {
})
}
}
+
+// reloadPrometheusConfig sends a reload request to the Prometheus server to apply
+// updated configurations.
+func reloadPrometheusConfig(t *testing.T, reloadURL string) {
+ t.Helper()
+
+ r, err := http.Post(reloadURL, "text/plain", nil)
+ require.NoError(t, err, "Failed to reload Prometheus")
+ require.Equal(t, http.StatusOK, r.StatusCode, "Unexpected status code when reloading Prometheus")
+}
+
+func getMetricValue(t *testing.T, body io.Reader, metricType model.MetricType, metricName string) (float64, error) {
+ t.Helper()
+
+ p := expfmt.TextParser{}
+ metricFamilies, err := p.TextToMetricFamilies(body)
+ if err != nil {
+ return 0, err
+ }
+ metricFamily, ok := metricFamilies[metricName]
+ if !ok {
+ return 0, errors.New("metric family not found")
+ }
+ metric := metricFamily.GetMetric()
+ if len(metric) != 1 {
+ return 0, errors.New("metric not found")
+ }
+ switch metricType {
+ case model.MetricTypeGauge:
+ return metric[0].GetGauge().GetValue(), nil
+ case model.MetricTypeCounter:
+ return metric[0].GetCounter().GetValue(), nil
+ default:
+ t.Fatalf("metric type %s not supported", metricType)
+ }
+
+ return 0, errors.New("cannot get value")
+}
+
+func TestRuntimeGOGCConfig(t *testing.T) {
+ if testing.Short() {
+ t.Skip("skipping test in short mode.")
+ }
+ t.Parallel()
+
+ for _, tc := range []struct {
+ name string
+ config string
+ gogcEnvVar string
+ expectedGOGC float64
+ }{
+ {
+ name: "empty config file",
+ expectedGOGC: 75,
+ },
+ {
+ name: "empty config file with GOGC env var set",
+ gogcEnvVar: "66",
+ expectedGOGC: 66,
+ },
+ {
+ name: "gogc set through config",
+ config: `
+runtime:
+ gogc: 77`,
+ expectedGOGC: 77.0,
+ },
+ {
+ name: "gogc set through config and env var",
+ config: `
+runtime:
+ gogc: 77`,
+ gogcEnvVar: "88",
+ expectedGOGC: 77.0,
+ },
+ {
+ name: "incomplete runtime block",
+ config: `
+runtime:`,
+ expectedGOGC: 75.0,
+ },
+ {
+ name: "incomplete runtime block and GOGC env var set",
+ config: `
+runtime:`,
+ gogcEnvVar: "88",
+ expectedGOGC: 88.0,
+ },
+ {
+ name: "unrelated config and GOGC env var set",
+ config: `
+global:
+ scrape_interval: 500ms`,
+ gogcEnvVar: "80",
+ expectedGOGC: 80,
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ tmpDir := t.TempDir()
+ configFile := filepath.Join(tmpDir, "prometheus.yml")
+
+ port := testutil.RandomUnprivilegedPort(t)
+ os.WriteFile(configFile, []byte(tc.config), 0o777)
+ prom := prometheusCommandWithLogging(
+ t,
+ configFile,
+ port,
+ fmt.Sprintf("--storage.tsdb.path=%s", tmpDir),
+ "--web.enable-lifecycle",
+ )
+ // Inject GOGC when set.
+ prom.Env = os.Environ()
+ if tc.gogcEnvVar != "" {
+ prom.Env = append(prom.Env, fmt.Sprintf("GOGC=%s", tc.gogcEnvVar))
+ }
+ require.NoError(t, prom.Start())
+
+ ensureGOGCValue := func(val float64) {
+ var (
+ r *http.Response
+ err error
+ )
+ // Wait for the /metrics endpoint to be ready.
+ require.Eventually(t, func() bool {
+ r, err = http.Get(fmt.Sprintf("http://127.0.0.1:%d/metrics", port))
+ if err != nil {
+ return false
+ }
+ return r.StatusCode == http.StatusOK
+ }, 5*time.Second, 50*time.Millisecond)
+ defer r.Body.Close()
+
+ // Check the final GOGC that's set, consider go_gc_gogc_percent from /metrics as source of truth.
+ gogc, err := getMetricValue(t, r.Body, model.MetricTypeGauge, "go_gc_gogc_percent")
+ require.NoError(t, err)
+ require.Equal(t, val, gogc)
+ }
+
+ // The value is applied on startup.
+ ensureGOGCValue(tc.expectedGOGC)
+
+ // After a reload with the same config, the value stays the same.
+ reloadURL := fmt.Sprintf("http://127.0.0.1:%d/-/reload", port)
+ reloadPrometheusConfig(t, reloadURL)
+ ensureGOGCValue(tc.expectedGOGC)
+
+ // After a reload with different config, the value gets updated.
+ newConfig := `
+runtime:
+ gogc: 99`
+ os.WriteFile(configFile, []byte(newConfig), 0o777)
+ reloadPrometheusConfig(t, reloadURL)
+ ensureGOGCValue(99.0)
+ })
+ }
+}
+
+// TestHeadCompactionWhileScraping verifies that running a head compaction
+// concurrently with a scrape does not trigger the data race described in
+// https://github.com/prometheus/prometheus/issues/16490.
+func TestHeadCompactionWhileScraping(t *testing.T) {
+ t.Parallel()
+
+ // To increase the chance of reproducing the data race
+ for i := range 5 {
+ t.Run(strconv.Itoa(i), func(t *testing.T) {
+ t.Parallel()
+
+ tmpDir := t.TempDir()
+ configFile := filepath.Join(tmpDir, "prometheus.yml")
+
+ port := testutil.RandomUnprivilegedPort(t)
+ config := fmt.Sprintf(`
+scrape_configs:
+ - job_name: 'self1'
+ scrape_interval: 61ms
+ static_configs:
+ - targets: ['localhost:%d']
+ - job_name: 'self2'
+ scrape_interval: 67ms
+ static_configs:
+ - targets: ['localhost:%d']
+`, port, port)
+ os.WriteFile(configFile, []byte(config), 0o777)
+
+ prom := prometheusCommandWithLogging(
+ t,
+ configFile,
+ port,
+ fmt.Sprintf("--storage.tsdb.path=%s", tmpDir),
+ "--storage.tsdb.min-block-duration=100ms",
+ )
+ require.NoError(t, prom.Start())
+
+ require.Eventually(t, func() bool {
+ r, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/metrics", port))
+ if err != nil {
+ return false
+ }
+ defer r.Body.Close()
+ if r.StatusCode != http.StatusOK {
+ return false
+ }
+ metrics, err := io.ReadAll(r.Body)
+ if err != nil {
+ return false
+ }
+
+ // Wait for some compactions to run
+ compactions, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeCounter, "prometheus_tsdb_compactions_total")
+ if err != nil {
+ return false
+ }
+ if compactions < 3 {
+ return false
+ }
+
+ // Sanity check: Some actual scraping was done.
+ series, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeCounter, "prometheus_tsdb_head_series_created_total")
+ require.NoError(t, err)
+ require.NotZero(t, series)
+
+ // No compaction must have failed
+ failures, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeCounter, "prometheus_tsdb_compactions_failed_total")
+ require.NoError(t, err)
+ require.Zero(t, failures)
+ return true
+ }, 15*time.Second, 500*time.Millisecond)
+ })
+ }
+}
diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go
index 2011fb123f..94eec27e79 100644
--- a/cmd/prometheus/main_unix_test.go
+++ b/cmd/prometheus/main_unix_test.go
@@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
+ t.Parallel()
port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t))
diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go
index 62e317bf8b..7c073b59d0 100644
--- a/cmd/prometheus/query_log_test.go
+++ b/cmd/prometheus/query_log_test.go
@@ -88,20 +88,13 @@ func (p *queryLogTest) setQueryLog(t *testing.T, queryLogFile string) {
_, err = p.configFile.Seek(0, 0)
require.NoError(t, err)
if queryLogFile != "" {
- _, err = p.configFile.Write([]byte(fmt.Sprintf("global:\n query_log_file: %s\n", queryLogFile)))
+ _, err = fmt.Fprintf(p.configFile, "global:\n query_log_file: %s\n", queryLogFile)
require.NoError(t, err)
}
_, err = p.configFile.Write([]byte(p.configuration()))
require.NoError(t, err)
}
-// reloadConfig reloads the configuration using POST.
-func (p *queryLogTest) reloadConfig(t *testing.T) {
- r, err := http.Post(fmt.Sprintf("http://%s:%d%s/-/reload", p.host, p.port, p.prefix), "text/plain", nil)
- require.NoError(t, err)
- require.Equal(t, 200, r.StatusCode)
-}
-
// query runs a query according to the test origin.
func (p *queryLogTest) query(t *testing.T) {
switch p.origin {
@@ -125,12 +118,61 @@ func (p *queryLogTest) query(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 200, r.StatusCode)
case ruleOrigin:
- time.Sleep(2 * time.Second)
+ // Poll the /api/v1/rules endpoint until a new rule evaluation is detected.
+ var lastEvalTime time.Time
+ for {
+ r, err := http.Get(fmt.Sprintf("http://%s:%d/api/v1/rules", p.host, p.port))
+ require.NoError(t, err)
+
+ rulesBody, err := io.ReadAll(r.Body)
+ require.NoError(t, err)
+ defer r.Body.Close()
+
+ // Parse the rules response to find the last evaluation time.
+ newEvalTime := parseLastEvaluation(rulesBody)
+ if newEvalTime.After(lastEvalTime) {
+ if !lastEvalTime.IsZero() {
+ break
+ }
+ lastEvalTime = newEvalTime
+ }
+
+ time.Sleep(100 * time.Millisecond)
+ }
default:
panic("can't query this origin")
}
}
+// parseLastEvaluation extracts the last evaluation timestamp from the /api/v1/rules response.
+func parseLastEvaluation(rulesBody []byte) time.Time {
+ var ruleResponse struct {
+ Status string `json:"status"`
+ Data struct {
+ Groups []struct {
+ Rules []struct {
+ LastEvaluation string `json:"lastEvaluation"`
+ } `json:"rules"`
+ } `json:"groups"`
+ } `json:"data"`
+ }
+
+ err := json.Unmarshal(rulesBody, &ruleResponse)
+ if err != nil {
+ return time.Time{}
+ }
+
+ for _, group := range ruleResponse.Data.Groups {
+ for _, rule := range group.Rules {
+ if evalTime, err := time.Parse(time.RFC3339Nano, rule.LastEvaluation); err == nil {
+ return evalTime
+ }
+ }
+ }
+
+ return time.Time{}
+}
+
// queryString returns the expected queryString of a this test.
func (p *queryLogTest) queryString() string {
switch p.origin {
@@ -259,6 +301,7 @@ func (p *queryLogTest) run(t *testing.T) {
}, p.params()...)
prom := exec.Command(promPath, params...)
+ reloadURL := fmt.Sprintf("http://%s:%d%s/-/reload", p.host, p.port, p.prefix)
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
@@ -286,7 +329,7 @@ func (p *queryLogTest) run(t *testing.T) {
p.query(t)
require.Empty(t, readQueryLog(t, queryLogFile.Name()))
p.setQueryLog(t, queryLogFile.Name())
- p.reloadConfig(t)
+ reloadPrometheusConfig(t, reloadURL)
}
p.query(t)
@@ -301,7 +344,7 @@ func (p *queryLogTest) run(t *testing.T) {
p.validateLastQuery(t, ql)
p.setQueryLog(t, "")
- p.reloadConfig(t)
+ reloadPrometheusConfig(t, reloadURL)
if !p.exactQueryCount() {
qc = len(readQueryLog(t, queryLogFile.Name()))
}
@@ -313,7 +356,7 @@ func (p *queryLogTest) run(t *testing.T) {
qc = len(ql)
p.setQueryLog(t, queryLogFile.Name())
- p.reloadConfig(t)
+ reloadPrometheusConfig(t, reloadURL)
p.query(t)
qc++
@@ -322,7 +365,7 @@ func (p *queryLogTest) run(t *testing.T) {
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
- require.Greater(t, len(ql), qc, "no queries logged")
+ require.GreaterOrEqual(t, len(ql), qc, "no queries logged")
}
p.validateLastQuery(t, ql)
qc = len(ql)
@@ -353,11 +396,11 @@ func (p *queryLogTest) run(t *testing.T) {
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
- require.Greater(t, len(ql), qc, "no queries logged")
+ require.GreaterOrEqual(t, len(ql), qc, "no queries logged")
}
p.validateLastQuery(t, ql)
- p.reloadConfig(t)
+ reloadPrometheusConfig(t, reloadURL)
p.query(t)
@@ -393,6 +436,7 @@ func readQueryLog(t *testing.T, path string) []queryLogLine {
file, err := os.Open(path)
require.NoError(t, err)
defer file.Close()
+
scanner := bufio.NewScanner(file)
for scanner.Scan() {
var q queryLogLine
@@ -406,6 +450,7 @@ func TestQueryLog(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
+ t.Parallel()
cwd, err := os.Getwd()
require.NoError(t, err)
@@ -424,6 +469,7 @@ func TestQueryLog(t *testing.T) {
}
t.Run(p.String(), func(t *testing.T) {
+ t.Parallel()
p.run(t)
})
}
diff --git a/cmd/prometheus/reload_test.go b/cmd/prometheus/reload_test.go
new file mode 100644
index 0000000000..c59e51b316
--- /dev/null
+++ b/cmd/prometheus/reload_test.go
@@ -0,0 +1,235 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "bufio"
+ "encoding/json"
+ "io"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+
+ "github.com/prometheus/prometheus/util/testutil"
+)
+
+const configReloadMetric = "prometheus_config_last_reload_successful"
+
+func TestAutoReloadConfig_ValidToValid(t *testing.T) {
+ steps := []struct {
+ configText string
+ expectedInterval string
+ expectedMetric float64
+ }{
+ {
+ configText: `
+global:
+ scrape_interval: 30s
+`,
+ expectedInterval: "30s",
+ expectedMetric: 1,
+ },
+ {
+ configText: `
+global:
+ scrape_interval: 15s
+`,
+ expectedInterval: "15s",
+ expectedMetric: 1,
+ },
+ {
+ configText: `
+global:
+ scrape_interval: 30s
+`,
+ expectedInterval: "30s",
+ expectedMetric: 1,
+ },
+ }
+
+ runTestSteps(t, steps)
+}
+
+func TestAutoReloadConfig_ValidToInvalidToValid(t *testing.T) {
+ steps := []struct {
+ configText string
+ expectedInterval string
+ expectedMetric float64
+ }{
+ {
+ configText: `
+global:
+ scrape_interval: 30s
+`,
+ expectedInterval: "30s",
+ expectedMetric: 1,
+ },
+ {
+ configText: `
+global:
+ scrape_interval: 15s
+invalid_syntax
+`,
+ expectedInterval: "30s",
+ expectedMetric: 0,
+ },
+ {
+ configText: `
+global:
+ scrape_interval: 30s
+`,
+ expectedInterval: "30s",
+ expectedMetric: 1,
+ },
+ }
+
+ runTestSteps(t, steps)
+}
+
+func runTestSteps(t *testing.T, steps []struct {
+ configText string
+ expectedInterval string
+ expectedMetric float64
+},
+) {
+ configDir := t.TempDir()
+ configFilePath := filepath.Join(configDir, "prometheus.yml")
+
+ t.Logf("Config file path: %s", configFilePath)
+
+ require.NoError(t, os.WriteFile(configFilePath, []byte(steps[0].configText), 0o644), "Failed to write initial config file")
+
+ port := testutil.RandomUnprivilegedPort(t)
+ prom := prometheusCommandWithLogging(t, configFilePath, port, "--enable-feature=auto-reload-config", "--config.auto-reload-interval=1s")
+ require.NoError(t, prom.Start())
+
+ baseURL := "http://localhost:" + strconv.Itoa(port)
+ require.Eventually(t, func() bool {
+ resp, err := http.Get(baseURL + "/-/ready")
+ if err != nil {
+ return false
+ }
+ defer resp.Body.Close()
+ return resp.StatusCode == http.StatusOK
+ }, 5*time.Second, 100*time.Millisecond, "Prometheus didn't become ready in time")
+
+ for i, step := range steps {
+ t.Logf("Step %d", i)
+ require.NoError(t, os.WriteFile(configFilePath, []byte(step.configText), 0o644), "Failed to write config file for step")
+
+ require.Eventually(t, func() bool {
+ return verifyScrapeInterval(t, baseURL, step.expectedInterval) &&
+ verifyConfigReloadMetric(t, baseURL, step.expectedMetric)
+ }, 10*time.Second, 500*time.Millisecond, "Prometheus config reload didn't happen in time")
+ }
+}
+
+func verifyScrapeInterval(t *testing.T, baseURL, expectedInterval string) bool {
+ resp, err := http.Get(baseURL + "/api/v1/status/config")
+ require.NoError(t, err)
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ require.NoError(t, err)
+
+ config := struct {
+ Data struct {
+ YAML string `json:"yaml"`
+ } `json:"data"`
+ }{}
+
+ require.NoError(t, json.Unmarshal(body, &config))
+ return strings.Contains(config.Data.YAML, "scrape_interval: "+expectedInterval)
+}
+
+func verifyConfigReloadMetric(t *testing.T, baseURL string, expectedValue float64) bool {
+ resp, err := http.Get(baseURL + "/metrics")
+ require.NoError(t, err)
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ require.NoError(t, err)
+
+ lines := string(body)
+ var actualValue float64
+ found := false
+
+ for _, line := range strings.Split(lines, "\n") {
+ if strings.HasPrefix(line, configReloadMetric) {
+ parts := strings.Fields(line)
+ if len(parts) >= 2 {
+ actualValue, err = strconv.ParseFloat(parts[1], 64)
+ require.NoError(t, err)
+ found = true
+ break
+ }
+ }
+ }
+
+ return found && actualValue == expectedValue
+}
+
+func captureLogsToTLog(t *testing.T, r io.Reader) {
+ scanner := bufio.NewScanner(r)
+ for scanner.Scan() {
+ t.Log(scanner.Text())
+ }
+ if err := scanner.Err(); err != nil {
+ t.Logf("Error reading logs: %v", err)
+ }
+}
+
+func prometheusCommandWithLogging(t *testing.T, configFilePath string, port int, extraArgs ...string) *exec.Cmd {
+ stdoutPipe, stdoutWriter := io.Pipe()
+ stderrPipe, stderrWriter := io.Pipe()
+
+ var wg sync.WaitGroup
+ wg.Add(2)
+
+ args := []string{
+ "-test.main",
+ "--config.file=" + configFilePath,
+ "--web.listen-address=0.0.0.0:" + strconv.Itoa(port),
+ }
+ args = append(args, extraArgs...)
+ prom := exec.Command(promPath, args...)
+ prom.Stdout = stdoutWriter
+ prom.Stderr = stderrWriter
+
+ go func() {
+ defer wg.Done()
+ captureLogsToTLog(t, stdoutPipe)
+ }()
+ go func() {
+ defer wg.Done()
+ captureLogsToTLog(t, stderrPipe)
+ }()
+
+ t.Cleanup(func() {
+ prom.Process.Kill()
+ prom.Wait()
+ stdoutWriter.Close()
+ stderrWriter.Close()
+ wg.Wait()
+ })
+ return prom
+}
diff --git a/cmd/prometheus/scrape_failure_log_test.go b/cmd/prometheus/scrape_failure_log_test.go
new file mode 100644
index 0000000000..f35cb7bee6
--- /dev/null
+++ b/cmd/prometheus/scrape_failure_log_test.go
@@ -0,0 +1,186 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "net/http"
+ "net/http/httptest"
+ "net/url"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+ "go.uber.org/atomic"
+
+ "github.com/prometheus/prometheus/util/testutil"
+)
+
+func TestScrapeFailureLogFile(t *testing.T) {
+ if testing.Short() {
+ t.Skip("skipping test in short mode.")
+ }
+
+ // Tracks the number of requests made to the mock server.
+ var requestCount atomic.Int32
+
+ // Starts a server that always returns HTTP 500 errors.
+ mockServerAddress := startGarbageServer(t, &requestCount)
+
+ // Create a temporary directory for Prometheus configuration and logs.
+ tempDir := t.TempDir()
+
+ // Define file paths for the scrape failure log and Prometheus configuration.
+ // Like other files, the scrape failure log file should be relative to the
+ // config file. Therefore, we split the name we put in the file and the full
+ // path used to check the content of the file.
+ scrapeFailureLogFileName := "scrape_failure.log"
+ scrapeFailureLogFile := filepath.Join(tempDir, scrapeFailureLogFileName)
+ promConfigFile := filepath.Join(tempDir, "prometheus.yml")
+
+ // Step 1: Set up an initial Prometheus configuration that globally
+ // specifies a scrape failure log file.
+ promConfig := fmt.Sprintf(`
+global:
+ scrape_interval: 500ms
+ scrape_failure_log_file: %s
+
+scrape_configs:
+ - job_name: 'test_job'
+ static_configs:
+ - targets: ['%s']
+`, scrapeFailureLogFileName, mockServerAddress)
+
+ err := os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
+ require.NoError(t, err, "Failed to write Prometheus configuration file")
+
+ // Start Prometheus with the generated configuration and a random port, enabling the lifecycle API.
+ port := testutil.RandomUnprivilegedPort(t)
+ params := []string{
+ "-test.main",
+ "--config.file=" + promConfigFile,
+ "--storage.tsdb.path=" + filepath.Join(tempDir, "data"),
+ fmt.Sprintf("--web.listen-address=127.0.0.1:%d", port),
+ "--web.enable-lifecycle",
+ }
+ prometheusProcess := exec.Command(promPath, params...)
+ prometheusProcess.Stdout = os.Stdout
+ prometheusProcess.Stderr = os.Stderr
+
+ err = prometheusProcess.Start()
+ require.NoError(t, err, "Failed to start Prometheus")
+ defer prometheusProcess.Process.Kill()
+
+ // Wait until the mock server receives at least two requests from Prometheus.
+ require.Eventually(t, func() bool {
+ return requestCount.Load() >= 2
+ }, 30*time.Second, 500*time.Millisecond, "Expected at least two requests to the mock server")
+
+ // Verify that the scrape failures have been logged to the specified file.
+ content, err := os.ReadFile(scrapeFailureLogFile)
+ require.NoError(t, err, "Failed to read scrape failure log")
+ require.Contains(t, string(content), "server returned HTTP status 500 Internal Server Error", "Expected scrape failure log entry not found")
+
+ // Step 2: Update the Prometheus configuration to remove the scrape failure
+ // log file setting.
+ promConfig = fmt.Sprintf(`
+global:
+ scrape_interval: 1s
+
+scrape_configs:
+ - job_name: 'test_job'
+ static_configs:
+ - targets: ['%s']
+`, mockServerAddress)
+
+ err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
+ require.NoError(t, err, "Failed to update Prometheus configuration file")
+
+ // Reload Prometheus with the updated configuration.
+ reloadURL := fmt.Sprintf("http://127.0.0.1:%d/-/reload", port)
+ reloadPrometheusConfig(t, reloadURL)
+
+ // Count the number of lines in the scrape failure log file before any
+ // further requests.
+ preReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
+
+ // Wait for at least two more requests to the mock server to ensure
+ // Prometheus continues scraping.
+ requestsBeforeReload := requestCount.Load()
+ require.Eventually(t, func() bool {
+ return requestCount.Load() >= requestsBeforeReload+2
+ }, 30*time.Second, 500*time.Millisecond, "Expected two more requests to the mock server after configuration reload")
+
+ // Ensure that no new lines were added to the scrape failure log file after
+ // the configuration change.
+ require.Equal(t, preReloadLogLineCount, countLinesInFile(scrapeFailureLogFile), "No new lines should be added to the scrape failure log file after removing the log setting")
+
+ // Step 3: Re-add the scrape failure log file setting, but this time under
+ // scrape_configs, and reload Prometheus.
+ promConfig = fmt.Sprintf(`
+global:
+ scrape_interval: 1s
+
+scrape_configs:
+ - job_name: 'test_job'
+ scrape_failure_log_file: %s
+ static_configs:
+ - targets: ['%s']
+`, scrapeFailureLogFileName, mockServerAddress)
+
+ err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
+ require.NoError(t, err, "Failed to update Prometheus configuration file")
+
+ // Reload Prometheus with the updated configuration.
+ reloadPrometheusConfig(t, reloadURL)
+
+ // Wait for at least two more requests to the mock server and verify that
+ // new log entries are created.
+ postReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
+ requestsBeforeReAddingLog := requestCount.Load()
+ require.Eventually(t, func() bool {
+ return requestCount.Load() >= requestsBeforeReAddingLog+2
+ }, 30*time.Second, 500*time.Millisecond, "Expected two additional requests after re-adding the log setting")
+
+ // Confirm that new lines were added to the scrape failure log file.
+ require.Greater(t, countLinesInFile(scrapeFailureLogFile), postReloadLogLineCount, "New lines should be added to the scrape failure log file after re-adding the log setting")
+}
+
+// startGarbageServer sets up a mock server that returns a 500 Internal Server Error
+// for all requests. It also increments the request count each time it's hit.
+func startGarbageServer(t *testing.T, requestCount *atomic.Int32) string {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+ requestCount.Inc()
+ w.WriteHeader(http.StatusInternalServerError)
+ }))
+ t.Cleanup(server.Close)
+
+ parsedURL, err := url.Parse(server.URL)
+ require.NoError(t, err, "Failed to parse mock server URL")
+
+ return parsedURL.Host
+}
+
+// countLinesInFile counts and returns the number of lines in the specified file.
+func countLinesInFile(filePath string) int {
+ data, err := os.ReadFile(filePath)
+ if err != nil {
+ return 0 // Return 0 if the file doesn't exist or can't be read.
+ }
+ return bytes.Count(data, []byte{'\n'})
+}
diff --git a/cmd/promtool/analyze.go b/cmd/promtool/analyze.go
index c1f523de52..26e6f2188c 100644
--- a/cmd/promtool/analyze.go
+++ b/cmd/promtool/analyze.go
@@ -34,8 +34,8 @@ import (
)
var (
- errNotNativeHistogram = fmt.Errorf("not a native histogram")
- errNotEnoughData = fmt.Errorf("not enough data")
+ errNotNativeHistogram = errors.New("not a native histogram")
+ errNotEnoughData = errors.New("not enough data")
outputHeader = `Bucket stats for each histogram series over time
------------------------------------------------
@@ -169,7 +169,7 @@ func querySamples(ctx context.Context, api v1.API, query string, end time.Time)
matrix, ok := values.(model.Matrix)
if !ok {
- return nil, fmt.Errorf("query of buckets resulted in non-Matrix")
+ return nil, errors.New("query of buckets resulted in non-Matrix")
}
return matrix, nil
@@ -259,7 +259,7 @@ func getBucketCountsAtTime(matrix model.Matrix, numBuckets, timeIdx int) ([]int,
prev := matrix[i].Values[timeIdx]
// Assume the results are nicely aligned.
if curr.Timestamp != prev.Timestamp {
- return counts, fmt.Errorf("matrix result is not time aligned")
+ return counts, errors.New("matrix result is not time aligned")
}
counts[i+1] = int(curr.Value - prev.Value)
}
diff --git a/cmd/promtool/analyze_test.go b/cmd/promtool/analyze_test.go
index 83d2ac4a3d..3de4283a15 100644
--- a/cmd/promtool/analyze_test.go
+++ b/cmd/promtool/analyze_test.go
@@ -17,9 +17,8 @@ import (
"fmt"
"testing"
- "github.com/stretchr/testify/require"
-
"github.com/prometheus/common/model"
+ "github.com/stretchr/testify/require"
)
var (
@@ -109,6 +108,7 @@ func init() {
}
func TestGetBucketCountsAtTime(t *testing.T) {
+ t.Parallel()
cases := []struct {
matrix model.Matrix
length int
@@ -137,6 +137,7 @@ func TestGetBucketCountsAtTime(t *testing.T) {
for _, c := range cases {
t.Run(fmt.Sprintf("exampleMatrix@%d", c.timeIdx), func(t *testing.T) {
+ t.Parallel()
res, err := getBucketCountsAtTime(c.matrix, c.length, c.timeIdx)
require.NoError(t, err)
require.Equal(t, c.expected, res)
@@ -145,6 +146,7 @@ func TestGetBucketCountsAtTime(t *testing.T) {
}
func TestCalcClassicBucketStatistics(t *testing.T) {
+ t.Parallel()
cases := []struct {
matrix model.Matrix
expected *statistics
@@ -162,6 +164,7 @@ func TestCalcClassicBucketStatistics(t *testing.T) {
for i, c := range cases {
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
+ t.Parallel()
res, err := calcClassicBucketStatistics(c.matrix)
require.NoError(t, err)
require.Equal(t, c.expected, res)
diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go
index 400cae421a..47de3b5c1c 100644
--- a/cmd/promtool/backfill.go
+++ b/cmd/promtool/backfill.go
@@ -21,8 +21,8 @@ import (
"math"
"time"
- "github.com/go-kit/log"
- "github.com/oklog/ulid"
+ "github.com/oklog/ulid/v2"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/textparse"
@@ -48,7 +48,7 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) {
_, ts, _ := p.Series()
if ts == nil {
- return 0, 0, fmt.Errorf("expected timestamp for series got none")
+ return 0, 0, errors.New("expected timestamp for series got none")
}
if *ts > maxt {
@@ -85,7 +85,7 @@ func getCompatibleBlockDuration(maxBlockDuration int64) int64 {
return blockDuration
}
-func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) {
+func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool, customLabels map[string]string) (returnErr error) {
blockDuration := getCompatibleBlockDuration(maxBlockDuration)
mint = blockDuration * (mint / blockDuration)
@@ -102,6 +102,8 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
nextSampleTs int64 = math.MaxInt64
)
+ lb := labels.NewBuilder(labels.EmptyLabels())
+
for t := mint; t <= maxt; t += blockDuration {
tsUpper := t + blockDuration
if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper {
@@ -118,7 +120,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
// also need to append samples throughout the whole block range. To allow that, we
// pretend that the block is twice as large here, but only really add sample in the
// original interval later.
- w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, 2*blockDuration)
+ w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), outputDir, 2*blockDuration)
if err != nil {
return fmt.Errorf("block writer: %w", err)
}
@@ -146,7 +148,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
_, ts, v := p.Series()
if ts == nil {
l := labels.Labels{}
- p.Metric(&l)
+ p.Labels(&l)
return fmt.Errorf("expected timestamp for series %v, got none", l)
}
if *ts < t {
@@ -160,9 +162,15 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
}
l := labels.Labels{}
- p.Metric(&l)
+ p.Labels(&l)
- if _, err := app.Append(0, l, *ts, v); err != nil {
+ lb.Reset(l)
+ for name, value := range customLabels {
+ lb.Set(name, value)
+ }
+ lbls := lb.Labels()
+
+ if _, err := app.Append(0, lbls, *ts, v); err != nil {
return fmt.Errorf("add sample: %w", err)
}
@@ -221,13 +229,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
return nil
}
-func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
+func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) (err error) {
p := textparse.NewOpenMetricsParser(input, nil) // Don't need a SymbolTable to get max and min timestamps.
maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil {
return fmt.Errorf("getting min and max timestamp: %w", err)
}
- if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet); err != nil {
+ if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet, customLabels); err != nil {
return fmt.Errorf("block creation: %w", err)
}
return nil
diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go
index 32abfa46a8..8a599510a9 100644
--- a/cmd/promtool/backfill_test.go
+++ b/cmd/promtool/backfill_test.go
@@ -45,7 +45,7 @@ func sortSamples(samples []backfillSample) {
})
}
-func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMaxTime int64) []backfillSample {
+func queryAllSeries(t testing.TB, q storage.Querier, _, _ int64) []backfillSample {
ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
samples := []backfillSample{}
for ss.Next() {
@@ -86,12 +86,14 @@ func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, exp
}
func TestBackfill(t *testing.T) {
+ t.Parallel()
tests := []struct {
ToParse string
IsOk bool
Description string
MaxSamplesInAppender int
MaxBlockDuration time.Duration
+ Labels map[string]string
Expected struct {
MinTime int64
MaxTime int64
@@ -636,6 +638,49 @@ http_requests_total{code="400"} 1024 7199
},
},
},
+ {
+ ToParse: `# HELP http_requests_total The total number of HTTP requests.
+# TYPE http_requests_total counter
+http_requests_total{code="200"} 1 1624463088.000
+http_requests_total{code="200"} 2 1629503088.000
+http_requests_total{code="200"} 3 1629863088.000
+# EOF
+`,
+ IsOk: true,
+ Description: "Sample with external labels.",
+ MaxSamplesInAppender: 5000,
+ MaxBlockDuration: 2048 * time.Hour,
+ Labels: map[string]string{"cluster_id": "123", "org_id": "999"},
+ Expected: struct {
+ MinTime int64
+ MaxTime int64
+ NumBlocks int
+ BlockDuration int64
+ Samples []backfillSample
+ }{
+ MinTime: 1624463088000,
+ MaxTime: 1629863088000,
+ NumBlocks: 2,
+ BlockDuration: int64(1458 * time.Hour / time.Millisecond),
+ Samples: []backfillSample{
+ {
+ Timestamp: 1624463088000,
+ Value: 1,
+ Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
+ },
+ {
+ Timestamp: 1629503088000,
+ Value: 2,
+ Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
+ },
+ {
+ Timestamp: 1629863088000,
+ Value: 3,
+ Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
+ },
+ },
+ },
+ },
{
ToParse: `# HELP rpc_duration_seconds A summary of the RPC duration in seconds.
# TYPE rpc_duration_seconds summary
@@ -685,11 +730,12 @@ after_eof 1 2
}
for _, test := range tests {
t.Run(test.Description, func(t *testing.T) {
+ t.Parallel()
t.Logf("Test:%s", test.Description)
outputDir := t.TempDir()
- err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration)
+ err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration, test.Labels)
if !test.IsOk {
require.Error(t, err, test.Description)
diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go
index e1d275e97e..c6a5801d28 100644
--- a/cmd/promtool/main.go
+++ b/cmd/promtool/main.go
@@ -32,20 +32,18 @@ import (
"time"
"github.com/alecthomas/kingpin/v2"
- "github.com/go-kit/log"
"github.com/google/pprof/profile"
"github.com/prometheus/client_golang/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
- config_util "github.com/prometheus/common/config"
- "github.com/prometheus/common/model"
- "github.com/prometheus/common/version"
- "github.com/prometheus/exporter-toolkit/web"
- "gopkg.in/yaml.v2"
-
dto "github.com/prometheus/client_model/go"
promconfig "github.com/prometheus/common/config"
"github.com/prometheus/common/expfmt"
+ "github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
+ "github.com/prometheus/common/version"
+ "github.com/prometheus/exporter-toolkit/web"
+ "gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@@ -58,24 +56,38 @@ import (
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
+ "github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/util/documentcli"
)
+var promqlEnableDelayedNameRemoval = false
+
+func init() {
+ // This can be removed when the legacy global mode is fully deprecated.
+ //nolint:staticcheck
+ model.NameValidationScheme = model.UTF8Validation
+}
+
const (
successExitCode = 0
failureExitCode = 1
// Exit code 3 is used for "one or more lint issues detected".
lintErrExitCode = 3
- lintOptionAll = "all"
- lintOptionDuplicateRules = "duplicate-rules"
- lintOptionNone = "none"
- checkHealth = "/-/healthy"
- checkReadiness = "/-/ready"
+ lintOptionAll = "all"
+ lintOptionDuplicateRules = "duplicate-rules"
+ lintOptionTooLongScrapeInterval = "too-long-scrape-interval"
+ lintOptionNone = "none"
+ checkHealth = "/-/healthy"
+ checkReadiness = "/-/ready"
)
-var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
+var (
+ lintRulesOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
+ // Same as lintRulesOptions, but including scrape config linting options as well.
+ lintConfigOptions = append(append([]string{}, lintRulesOptions...), lintOptionTooLongScrapeInterval)
+)
func main() {
var (
@@ -92,6 +104,10 @@ func main() {
app.HelpFlag.Short('h')
checkCmd := app.Command("check", "Check the resources for validity.")
+ checkLookbackDelta := checkCmd.Flag(
+ "query.lookback-delta",
+ "The server's maximum query lookback duration.",
+ ).Default("5m").Duration()
experimental := app.Flag("experimental", "Enable experimental commands.").Bool()
@@ -108,11 +124,12 @@ func main() {
checkConfigSyntaxOnly := checkConfigCmd.Flag("syntax-only", "Only check the config file syntax, ignoring file and content validation referenced in the config").Bool()
checkConfigLint := checkConfigCmd.Flag(
"lint",
- "Linting checks to apply to the rules specified in the config. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting",
+ "Linting checks to apply to the rules/scrape configs specified in the config. Available options are: "+strings.Join(lintConfigOptions, ", ")+". Use --lint=none to disable linting",
).Default(lintOptionDuplicateRules).String()
checkConfigLintFatal := checkConfigCmd.Flag(
"lint-fatal",
"Make lint errors exit with exit code 3.").Default("false").Bool()
+ checkConfigIgnoreUnknownFields := checkConfigCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule groups read by the config files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()
checkWebConfigCmd := checkCmd.Command("web-config", "Check if the web config files are valid or not.")
webConfigFiles := checkWebConfigCmd.Arg(
@@ -135,11 +152,12 @@ func main() {
).ExistingFiles()
checkRulesLint := checkRulesCmd.Flag(
"lint",
- "Linting checks to apply. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting",
+ "Linting checks to apply. Available options are: "+strings.Join(lintRulesOptions, ", ")+". Use --lint=none to disable linting",
).Default(lintOptionDuplicateRules).String()
checkRulesLintFatal := checkRulesCmd.Flag(
"lint-fatal",
"Make lint errors exit with exit code 3.").Default("false").Bool()
+ checkRulesIgnoreUnknownFields := checkRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
@@ -204,13 +222,16 @@ func main() {
pushMetricsHeaders := pushMetricsCmd.Flag("header", "Prometheus remote write header.").StringMap()
testCmd := app.Command("test", "Unit testing.")
+ junitOutFile := testCmd.Flag("junit", "File path to store JUnit XML test results.").OpenFile(os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
testRulesCmd := testCmd.Command("rules", "Unit tests for rules.")
testRulesRun := testRulesCmd.Flag("run", "If set, will only run test groups whose names match the regular expression. Can be specified multiple times.").Strings()
testRulesFiles := testRulesCmd.Arg(
"test-rule-file",
"The unit test file.",
).Required().ExistingFiles()
+ testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool()
testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool()
+ testRulesIgnoreUnknownFields := testRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the test files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()
defaultDBPath := "data/"
tsdbCmd := app.Command("tsdb", "Run tsdb commands.")
@@ -235,16 +256,16 @@ func main() {
tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.")
dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
- dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String()
- dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
- dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
+ dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String()
+ dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump, in milliseconds since the Unix epoch.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
+ dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump, in milliseconds since the Unix epoch.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.")
dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
- dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String()
- dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
- dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
+ dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String()
+ dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump, in milliseconds since the Unix epoch.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
+ dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump, in milliseconds since the Unix epoch.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpOpenMetricsMatch := tsdbDumpOpenMetricsCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.")
@@ -252,6 +273,7 @@ func main() {
importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool()
maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("").Duration()
openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.")
+ openMetricsLabels := openMetricsImportCmd.Flag("label", "Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value").StringMap()
importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String()
importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String()
importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.")
@@ -284,7 +306,7 @@ func main() {
promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String()
promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String()
- featureList := app.Flag("enable-feature", "Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details.").Default("").Strings()
+ featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Valid options: promql-experimental-functions, promql-delayed-name-removal. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details").Default("").Strings()
documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden()
@@ -303,40 +325,39 @@ func main() {
kingpin.Fatalf("Cannot set base auth in the server URL and use a http.config.file at the same time")
}
var err error
- httpConfig, _, err := config_util.LoadHTTPConfigFile(httpConfigFilePath)
+ httpConfig, _, err := promconfig.LoadHTTPConfigFile(httpConfigFilePath)
if err != nil {
kingpin.Fatalf("Failed to load HTTP config file: %v", err)
}
- httpRoundTripper, err = promconfig.NewRoundTripperFromConfig(*httpConfig, "promtool", config_util.WithUserAgent("promtool/"+version.Version))
+ httpRoundTripper, err = promconfig.NewRoundTripperFromConfig(*httpConfig, "promtool", promconfig.WithUserAgent(version.ComponentUserAgent("promtool")))
if err != nil {
kingpin.Fatalf("Failed to create a new HTTP round tripper: %v", err)
}
}
- var noDefaultScrapePort bool
for _, f := range *featureList {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
- case "no-default-scrape-port":
- noDefaultScrapePort = true
+ case "promql-experimental-functions":
+ parser.EnableExperimentalFunctions = true
+ case "promql-delayed-name-removal":
+ promqlEnableDelayedNameRemoval = true
case "":
continue
- case "promql-at-modifier", "promql-negative-offset":
- fmt.Printf(" WARNING: Option for --enable-feature is a no-op after promotion to a stable feature: %q\n", o)
default:
- fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o)
+ fmt.Printf(" WARNING: Unknown feature passed to --enable-feature: %s", o)
}
}
}
switch parsedCmd {
case sdCheckCmd.FullCommand():
- os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer))
+ os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
- os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
+ os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, *checkConfigIgnoreUnknownFields, model.Duration(*checkLookbackDelta)), *configFiles...))
case checkServerHealthCmd.FullCommand():
os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper)))
@@ -348,7 +369,7 @@ func main() {
os.Exit(CheckWebConfig(*webConfigFiles...))
case checkRulesCmd.FullCommand():
- os.Exit(CheckRules(newLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...))
+ os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal, *checkRulesIgnoreUnknownFields), *ruleFiles...))
case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics(*checkMetricsExtended))
@@ -378,13 +399,20 @@ func main() {
os.Exit(QueryLabels(serverURL, httpRoundTripper, *queryLabelsMatch, *queryLabelsName, *queryLabelsBegin, *queryLabelsEnd, p))
case testRulesCmd.FullCommand():
- os.Exit(RulesUnitTest(
+ results := io.Discard
+ if *junitOutFile != nil {
+ results = *junitOutFile
+ }
+ os.Exit(RulesUnitTestResult(results,
promqltest.LazyLoaderOpts{
- EnableAtModifier: true,
- EnableNegativeOffset: true,
+ EnableAtModifier: true,
+ EnableNegativeOffset: true,
+ EnableDelayedNameRemoval: promqlEnableDelayedNameRemoval,
},
*testRulesRun,
*testRulesDiff,
+ *testRulesDebug,
+ *testRulesIgnoreUnknownFields,
*testRulesFiles...),
)
@@ -403,7 +431,7 @@ func main() {
os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics)))
// TODO(aSquare14): Work on adding support for custom block size.
case openMetricsImportCmd.FullCommand():
- os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration))
+ os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration, *openMetricsLabels))
case importRulesCmd.FullCommand():
os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...)))
@@ -435,18 +463,20 @@ func checkExperimental(f bool) {
}
}
-var errLint = fmt.Errorf("lint error")
+var errLint = errors.New("lint error")
-type lintConfig struct {
- all bool
- duplicateRules bool
- fatal bool
+type rulesLintConfig struct {
+ all bool
+ duplicateRules bool
+ fatal bool
+ ignoreUnknownFields bool
}
-func newLintConfig(stringVal string, fatal bool) lintConfig {
+func newRulesLintConfig(stringVal string, fatal, ignoreUnknownFields bool) rulesLintConfig {
items := strings.Split(stringVal, ",")
- ls := lintConfig{
- fatal: fatal,
+ ls := rulesLintConfig{
+ fatal: fatal,
+ ignoreUnknownFields: ignoreUnknownFields,
}
for _, setting := range items {
switch setting {
@@ -456,17 +486,58 @@ func newLintConfig(stringVal string, fatal bool) lintConfig {
ls.duplicateRules = true
case lintOptionNone:
default:
- fmt.Printf("WARNING: unknown lint option %s\n", setting)
+ fmt.Printf("WARNING: unknown lint option: %q\n", setting)
}
}
return ls
}
-func (ls lintConfig) lintDuplicateRules() bool {
+func (ls rulesLintConfig) lintDuplicateRules() bool {
return ls.all || ls.duplicateRules
}
-// Check server status - healthy & ready.
+type configLintConfig struct {
+ rulesLintConfig
+
+ lookbackDelta model.Duration
+}
+
+func newConfigLintConfig(optionsStr string, fatal, ignoreUnknownFields bool, lookbackDelta model.Duration) configLintConfig {
+ c := configLintConfig{
+ rulesLintConfig: rulesLintConfig{
+ fatal: fatal,
+ },
+ }
+
+ lintNone := false
+ var rulesOptions []string
+ for _, option := range strings.Split(optionsStr, ",") {
+ switch option {
+ case lintOptionAll, lintOptionTooLongScrapeInterval:
+ c.lookbackDelta = lookbackDelta
+ if option == lintOptionAll {
+ rulesOptions = append(rulesOptions, lintOptionAll)
+ }
+ case lintOptionNone:
+ lintNone = true
+ default:
+ rulesOptions = append(rulesOptions, option)
+ }
+ }
+
+ if lintNone {
+ c.lookbackDelta = 0
+ rulesOptions = nil
+ }
+
+ if len(rulesOptions) > 0 {
+ c.rulesLintConfig = newRulesLintConfig(strings.Join(rulesOptions, ","), fatal, ignoreUnknownFields)
+ }
+
+ return c
+}
+
+// CheckServerStatus - healthy & ready.
func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
if serverURL.Scheme == "" {
serverURL.Scheme = "http"
@@ -504,12 +575,12 @@ func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper ht
}
// CheckConfig validates configuration files.
-func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ...string) int {
+func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings configLintConfig, files ...string) int {
failed := false
hasErrors := false
for _, f := range files {
- ruleFiles, err := checkConfig(agentMode, f, checkSyntaxOnly)
+ ruleFiles, scrapeConfigs, err := checkConfig(agentMode, f, checkSyntaxOnly)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
hasErrors = true
@@ -522,12 +593,12 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files
}
fmt.Println()
- rulesFailed, rulesHasErrors := checkRules(ruleFiles, lintSettings)
- if rulesFailed {
- failed = rulesFailed
- }
- if rulesHasErrors {
- hasErrors = rulesHasErrors
+ if !checkSyntaxOnly {
+ scrapeConfigsFailed := lintScrapeConfigs(scrapeConfigs, lintSettings)
+ failed = failed || scrapeConfigsFailed
+ rulesFailed, rulesHaveErrors := checkRules(ruleFiles, lintSettings.rulesLintConfig)
+ failed = failed || rulesFailed
+ hasErrors = hasErrors || rulesHaveErrors
}
}
if failed && hasErrors {
@@ -566,12 +637,12 @@ func checkFileExists(fn string) error {
return err
}
-func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) {
+func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, []*config.ScrapeConfig, error) {
fmt.Println("Checking", filename)
- cfg, err := config.LoadFile(filename, agentMode, false, log.NewNopLogger())
+ cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger())
if err != nil {
- return nil, err
+ return nil, nil, err
}
var ruleFiles []string
@@ -579,15 +650,15 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, rf := range cfg.RuleFiles {
rfs, err := filepath.Glob(rf)
if err != nil {
- return nil, err
+ return nil, nil, err
}
// If an explicit file was given, error if it is not accessible.
if !strings.Contains(rf, "*") {
if len(rfs) == 0 {
- return nil, fmt.Errorf("%q does not point to an existing file", rf)
+ return nil, nil, fmt.Errorf("%q does not point to an existing file", rf)
}
if err := checkFileExists(rfs[0]); err != nil {
- return nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err)
+ return nil, nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err)
}
}
ruleFiles = append(ruleFiles, rfs...)
@@ -601,26 +672,26 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
var err error
scfgs, err = cfg.GetScrapeConfigs()
if err != nil {
- return nil, fmt.Errorf("error loading scrape configs: %w", err)
+ return nil, nil, fmt.Errorf("error loading scrape configs: %w", err)
}
}
for _, scfg := range scfgs {
if !checkSyntaxOnly && scfg.HTTPClientConfig.Authorization != nil {
if err := checkFileExists(scfg.HTTPClientConfig.Authorization.CredentialsFile); err != nil {
- return nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err)
+ return nil, nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err)
}
}
if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
- return nil, err
+ return nil, nil, err
}
for _, c := range scfg.ServiceDiscoveryConfigs {
switch c := c.(type) {
case *kubernetes.SDConfig:
if err := checkTLSConfig(c.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
- return nil, err
+ return nil, nil, err
}
case *file.SDConfig:
if checkSyntaxOnly {
@@ -629,17 +700,17 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
- return nil, err
+ return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
- return nil, fmt.Errorf("checking SD file %q: %w", file, err)
+ return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForScrapeConfig(targetGroups, scfg); err != nil {
- return nil, err
+ return nil, nil, err
}
}
continue
@@ -648,7 +719,7 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
}
case discovery.StaticConfig:
if err := checkTargetGroupsForScrapeConfig(c, scfg); err != nil {
- return nil, err
+ return nil, nil, err
}
}
}
@@ -665,18 +736,18 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
- return nil, err
+ return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
- return nil, fmt.Errorf("checking SD file %q: %w", file, err)
+ return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForAlertmanager(targetGroups, amcfg); err != nil {
- return nil, err
+ return nil, nil, err
}
}
continue
@@ -685,15 +756,15 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
}
case discovery.StaticConfig:
if err := checkTargetGroupsForAlertmanager(c, amcfg); err != nil {
- return nil, err
+ return nil, nil, err
}
}
}
}
- return ruleFiles, nil
+ return ruleFiles, scfgs, nil
}
-func checkTLSConfig(tlsConfig config_util.TLSConfig, checkSyntaxOnly bool) error {
+func checkTLSConfig(tlsConfig promconfig.TLSConfig, checkSyntaxOnly bool) error {
if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 {
return fmt.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile)
}
@@ -752,7 +823,7 @@ func checkSDFile(filename string) ([]*targetgroup.Group, error) {
}
// CheckRules validates rule files.
-func CheckRules(ls lintConfig, files ...string) int {
+func CheckRules(ls rulesLintConfig, files ...string) int {
failed := false
hasErrors := false
if len(files) == 0 {
@@ -772,7 +843,7 @@ func CheckRules(ls lintConfig, files ...string) int {
}
// checkRulesFromStdin validates rule from stdin.
-func checkRulesFromStdin(ls lintConfig) (bool, bool) {
+func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) {
failed := false
hasErrors := false
fmt.Println("Checking standard input")
@@ -781,7 +852,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return true, true
}
- rgs, errs := rulefmt.Parse(data)
+ rgs, errs := rulefmt.Parse(data, ls.ignoreUnknownFields)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
@@ -810,12 +881,12 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
}
// checkRules validates rule files.
-func checkRules(files []string, ls lintConfig) (bool, bool) {
+func checkRules(files []string, ls rulesLintConfig) (bool, bool) {
failed := false
hasErrors := false
for _, f := range files {
fmt.Println("Checking", f)
- rgs, errs := rulefmt.ParseFile(f)
+ rgs, errs := rulefmt.ParseFile(f, ls.ignoreUnknownFields)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
@@ -844,7 +915,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) {
return failed, hasErrors
}
-func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []error) {
+func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings rulesLintConfig) (int, []error) {
numRules := 0
for _, rg := range rgs.Groups {
numRules += len(rg.Rules)
@@ -868,6 +939,16 @@ func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []e
return numRules, nil
}
+func lintScrapeConfigs(scrapeConfigs []*config.ScrapeConfig, lintSettings configLintConfig) bool {
+ for _, scfg := range scrapeConfigs {
+ if lintSettings.lookbackDelta > 0 && scfg.ScrapeInterval >= lintSettings.lookbackDelta {
+ fmt.Fprintf(os.Stderr, " FAILED: too long scrape interval found, data point will be marked as stale - job: %s, interval: %s\n", scfg.JobName, scfg.ScrapeInterval)
+ return true
+ }
+ }
+ return false
+}
+
type compareRuleType struct {
metric string
label labels.Labels
@@ -889,40 +970,40 @@ func compare(a, b compareRuleType) int {
func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType {
var duplicates []compareRuleType
- var rules compareRuleTypes
+ var cRules compareRuleTypes
for _, group := range groups {
for _, rule := range group.Rules {
- rules = append(rules, compareRuleType{
+ cRules = append(cRules, compareRuleType{
metric: ruleMetric(rule),
- label: labels.FromMap(rule.Labels),
+ label: rules.FromMaps(group.Labels, rule.Labels),
})
}
}
- if len(rules) < 2 {
+ if len(cRules) < 2 {
return duplicates
}
- sort.Sort(rules)
+ sort.Sort(cRules)
- last := rules[0]
- for i := 1; i < len(rules); i++ {
- if compare(last, rules[i]) == 0 {
+ last := cRules[0]
+ for i := 1; i < len(cRules); i++ {
+ if compare(last, cRules[i]) == 0 {
// Don't add a duplicated rule multiple times.
if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 {
- duplicates = append(duplicates, rules[i])
+ duplicates = append(duplicates, cRules[i])
}
}
- last = rules[i]
+ last = cRules[i]
}
return duplicates
}
-func ruleMetric(rule rulefmt.RuleNode) string {
- if rule.Alert.Value != "" {
- return rule.Alert.Value
+func ruleMetric(rule rulefmt.Rule) string {
+ if rule.Alert != "" {
+ return rule.Alert
}
- return rule.Record.Value
+ return rule.Record
}
var checkMetricsUsage = strings.TrimSpace(`
@@ -1176,7 +1257,7 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu
return fmt.Errorf("new api client error: %w", err)
}
- ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api)
+ ruleImporter := newRuleImporter(promslog.New(&promslog.Config{}), cfg, api)
errs := ruleImporter.loadGroups(ctx, files)
for _, err := range errs {
if err != nil {
@@ -1210,7 +1291,7 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c
lb := labels.NewBuilder(labels.EmptyLabels())
for _, tg := range targetGroups {
var failures []error
- targets, failures = scrape.TargetsFromGroup(tg, scfg, false, targets, lb)
+ targets, failures = scrape.TargetsFromGroup(tg, scfg, targets, lb)
if len(failures) > 0 {
first := failures[0]
return first
@@ -1250,7 +1331,7 @@ func labelsSetPromQL(query, labelMatchType, name, value string) error {
return fmt.Errorf("invalid label match type: %s", labelMatchType)
}
- parser.Inspect(expr, func(node parser.Node, path []parser.Node) error {
+ parser.Inspect(expr, func(node parser.Node, _ []parser.Node) error {
if n, ok := node.(*parser.VectorSelector); ok {
var found bool
for i, l := range n.LabelMatchers {
@@ -1281,7 +1362,7 @@ func labelsDeletePromQL(query, name string) error {
return err
}
- parser.Inspect(expr, func(node parser.Node, path []parser.Node) error {
+ parser.Inspect(expr, func(node parser.Node, _ []parser.Node) error {
if n, ok := node.(*parser.VectorSelector); ok {
for i, l := range n.LabelMatchers {
if l.Name == name {
diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go
index 78500fe937..f922d18c4e 100644
--- a/cmd/promtool/main_test.go
+++ b/cmd/promtool/main_test.go
@@ -31,12 +31,20 @@ import (
"testing"
"time"
+ "github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
+ "github.com/prometheus/prometheus/promql/promqltest"
)
+func init() {
+ // This can be removed when the legacy global mode is fully deprecated.
+ //nolint:staticcheck
+ model.NameValidationScheme = model.UTF8Validation
+}
+
var promtoolPath = os.Args[0]
func TestMain(m *testing.M) {
@@ -53,6 +61,7 @@ func TestMain(m *testing.M) {
}
func TestQueryRange(t *testing.T) {
+ t.Parallel()
s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`)
defer s.Close()
@@ -76,6 +85,7 @@ func TestQueryRange(t *testing.T) {
}
func TestQueryInstant(t *testing.T) {
+ t.Parallel()
s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "vector", "result": []}}`)
defer s.Close()
@@ -107,6 +117,7 @@ func mockServer(code int, body string) (*httptest.Server, func() *http.Request)
}
func TestCheckSDFile(t *testing.T) {
+ t.Parallel()
cases := []struct {
name string
file string
@@ -126,8 +137,8 @@ func TestCheckSDFile(t *testing.T) {
},
{
name: "bad file extension",
- file: "./testdata/bad-sd-file-extension.nonexistant",
- err: "invalid file extension: \".nonexistant\"",
+ file: "./testdata/bad-sd-file-extension.nonexistent",
+ err: "invalid file extension: \".nonexistent\"",
},
{
name: "bad format",
@@ -137,9 +148,10 @@ func TestCheckSDFile(t *testing.T) {
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
+ t.Parallel()
_, err := checkSDFile(test.file)
if test.err != "" {
- require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error())
+ require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@@ -148,6 +160,7 @@ func TestCheckSDFile(t *testing.T) {
}
func TestCheckDuplicates(t *testing.T) {
+ t.Parallel()
cases := []struct {
name string
ruleFile string
@@ -172,7 +185,8 @@ func TestCheckDuplicates(t *testing.T) {
for _, test := range cases {
c := test
t.Run(c.name, func(t *testing.T) {
- rgs, err := rulefmt.ParseFile(c.ruleFile)
+ t.Parallel()
+ rgs, err := rulefmt.ParseFile(c.ruleFile, false)
require.Empty(t, err)
dups := checkDuplicates(rgs.Groups)
require.Equal(t, c.expectedDups, dups)
@@ -181,7 +195,7 @@ func TestCheckDuplicates(t *testing.T) {
}
func BenchmarkCheckDuplicates(b *testing.B) {
- rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml")
+ rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml", false)
require.Empty(b, err)
b.ResetTimer()
@@ -191,6 +205,7 @@ func BenchmarkCheckDuplicates(b *testing.B) {
}
func TestCheckTargetConfig(t *testing.T) {
+ t.Parallel()
cases := []struct {
name string
file string
@@ -219,9 +234,10 @@ func TestCheckTargetConfig(t *testing.T) {
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
- _, err := checkConfig(false, "testdata/"+test.file, false)
+ t.Parallel()
+ _, _, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
- require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error())
+ require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@@ -230,6 +246,7 @@ func TestCheckTargetConfig(t *testing.T) {
}
func TestCheckConfigSyntax(t *testing.T) {
+ t.Parallel()
cases := []struct {
name string
file string
@@ -302,13 +319,14 @@ func TestCheckConfigSyntax(t *testing.T) {
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
- _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly)
+ t.Parallel()
+ _, _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly)
expectedErrMsg := test.err
if strings.Contains(runtime.GOOS, "windows") {
expectedErrMsg = test.errWindows
}
if expectedErrMsg != "" {
- require.Equalf(t, expectedErrMsg, err.Error(), "Expected error %q, got %q", test.err, err.Error())
+ require.EqualErrorf(t, err, expectedErrMsg, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@@ -317,6 +335,7 @@ func TestCheckConfigSyntax(t *testing.T) {
}
func TestAuthorizationConfig(t *testing.T) {
+ t.Parallel()
cases := []struct {
name string
file string
@@ -336,9 +355,10 @@ func TestAuthorizationConfig(t *testing.T) {
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
- _, err := checkConfig(false, "testdata/"+test.file, false)
+ t.Parallel()
+ _, _, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
- require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error())
+ require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@@ -350,6 +370,7 @@ func TestCheckMetricsExtended(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Skipping on windows")
}
+ t.Parallel()
f, err := os.Open("testdata/metrics-test.prom")
require.NoError(t, err)
@@ -386,6 +407,7 @@ func TestExitCodes(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
+ t.Parallel()
for _, c := range []struct {
file string
@@ -410,8 +432,10 @@ func TestExitCodes(t *testing.T) {
},
} {
t.Run(c.file, func(t *testing.T) {
+ t.Parallel()
for _, lintFatal := range []bool{true, false} {
t.Run(strconv.FormatBool(lintFatal), func(t *testing.T) {
+ t.Parallel()
args := []string{"-test.main", "check", "config", "testdata/" + c.file}
if lintFatal {
args = append(args, "--lint-fatal")
@@ -442,6 +466,7 @@ func TestDocumentation(t *testing.T) {
if runtime.GOOS == "windows" {
t.SkipNow()
}
+ t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
@@ -484,8 +509,8 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
- exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
- require.Equal(t, successExitCode, exitCode, "")
+ exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false))
+ require.Equal(t, successExitCode, exitCode)
})
t.Run("rules-bad", func(t *testing.T) {
@@ -506,8 +531,8 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
- exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
- require.Equal(t, failureExitCode, exitCode, "")
+ exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false))
+ require.Equal(t, failureExitCode, exitCode)
})
t.Run("rules-lint-fatal", func(t *testing.T) {
@@ -528,24 +553,125 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
- exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true))
- require.Equal(t, lintErrExitCode, exitCode, "")
+ exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false))
+ require.Equal(t, lintErrExitCode, exitCode)
})
}
+func TestCheckRulesWithFeatureFlag(t *testing.T) {
+ // As opposed to TestCheckRules calling CheckRules directly we run promtool
+ // so the feature flag parsing can be tested.
+
+ args := []string{"-test.main", "--enable-feature=promql-experimental-functions", "check", "rules", "testdata/features.yml"}
+ tool := exec.Command(promtoolPath, args...)
+ err := tool.Run()
+ require.NoError(t, err)
+}
+
func TestCheckRulesWithRuleFiles(t *testing.T) {
t.Run("rules-good", func(t *testing.T) {
- exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml")
- require.Equal(t, successExitCode, exitCode, "")
+ t.Parallel()
+ exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false), "./testdata/rules.yml")
+ require.Equal(t, successExitCode, exitCode)
})
t.Run("rules-bad", func(t *testing.T) {
- exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml")
- require.Equal(t, failureExitCode, exitCode, "")
+ t.Parallel()
+ exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false), "./testdata/rules-bad.yml")
+ require.Equal(t, failureExitCode, exitCode)
})
t.Run("rules-lint-fatal", func(t *testing.T) {
- exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml")
- require.Equal(t, lintErrExitCode, exitCode, "")
+ t.Parallel()
+ exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false), "./testdata/prometheus-rules.lint.yml")
+ require.Equal(t, lintErrExitCode, exitCode)
})
}
+
+func TestCheckScrapeConfigs(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ lookbackDelta model.Duration
+ expectError bool
+ }{
+ {
+ name: "scrape interval less than lookback delta",
+ lookbackDelta: model.Duration(11 * time.Minute),
+ expectError: false,
+ },
+ {
+ name: "scrape interval greater than lookback delta",
+ lookbackDelta: model.Duration(5 * time.Minute),
+ expectError: true,
+ },
+ {
+ name: "scrape interval same as lookback delta",
+ lookbackDelta: model.Duration(10 * time.Minute),
+ expectError: true,
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ // Non-fatal linting.
+ code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
+ require.Equal(t, successExitCode, code, "Non-fatal linting should return success")
+ // Fatal linting.
+ code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
+ if tc.expectError {
+ require.Equal(t, lintErrExitCode, code, "Fatal linting should return error")
+ } else {
+ require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems")
+ }
+ // Check syntax only, no linting.
+ code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
+ require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only")
+ // Lint option "none" should disable linting.
+ code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
+ require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`)
+ })
+ }
+}
+
+func TestTSDBDumpCommand(t *testing.T) {
+ if testing.Short() {
+ t.Skip("skipping test in short mode.")
+ }
+ t.Parallel()
+
+ storage := promqltest.LoadedStorage(t, `
+ load 1m
+ metric{foo="bar"} 1 2 3
+ `)
+ t.Cleanup(func() { storage.Close() })
+
+ for _, c := range []struct {
+ name string
+ subCmd string
+ sandboxDirRoot string
+ }{
+ {
+ name: "dump",
+ subCmd: "dump",
+ },
+ {
+ name: "dump with sandbox dir root",
+ subCmd: "dump",
+ sandboxDirRoot: t.TempDir(),
+ },
+ {
+ name: "dump-openmetrics",
+ subCmd: "dump-openmetrics",
+ },
+ {
+ name: "dump-openmetrics with sandbox dir root",
+ subCmd: "dump-openmetrics",
+ sandboxDirRoot: t.TempDir(),
+ },
+ } {
+ t.Run(c.name, func(t *testing.T) {
+ t.Parallel()
+ args := []string{"-test.main", "tsdb", c.subCmd, storage.Dir()}
+ cmd := exec.Command(promtoolPath, args...)
+ require.NoError(t, cmd.Run())
+ })
+ }
+}
diff --git a/cmd/promtool/metrics.go b/cmd/promtool/metrics.go
index 46246b672a..56b5209541 100644
--- a/cmd/promtool/metrics.go
+++ b/cmd/promtool/metrics.go
@@ -23,15 +23,15 @@ import (
"os"
"time"
- "github.com/golang/snappy"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/storage/remote"
+ "github.com/prometheus/prometheus/util/compression"
"github.com/prometheus/prometheus/util/fmtutil"
)
-// Push metrics to a prometheus remote write (for testing purpose only).
+// PushMetrics to a prometheus remote write (for testing purpose only).
func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, timeout time.Duration, labels map[string]string, files ...string) int {
addressURL, err := url.Parse(url.String())
if err != nil {
@@ -101,6 +101,7 @@ func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[strin
return successExitCode
}
+// TODO(bwplotka): Add PRW 2.0 support.
func parseAndPushMetrics(client *remote.Client, data []byte, labels map[string]string) bool {
metricsData, err := fmtutil.MetricTextToWriteRequest(bytes.NewReader(data), labels)
if err != nil {
@@ -115,8 +116,13 @@ func parseAndPushMetrics(client *remote.Client, data []byte, labels map[string]s
}
// Encode the request body into snappy encoding.
- compressed := snappy.Encode(nil, raw)
- err = client.Store(context.Background(), compressed, 0)
+ compressed, err := compression.Encode(compression.Snappy, raw, nil)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, " FAILED:", err)
+ return false
+ }
+
+ _, err = client.Store(context.Background(), compressed, 0)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return false
diff --git a/cmd/promtool/rules.go b/cmd/promtool/rules.go
index 5a18644842..b2eb18ca8e 100644
--- a/cmd/promtool/rules.go
+++ b/cmd/promtool/rules.go
@@ -16,12 +16,12 @@ package main
import (
"context"
"fmt"
+ "log/slog"
"time"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
@@ -38,7 +38,7 @@ type queryRangeAPI interface {
}
type ruleImporter struct {
- logger log.Logger
+ logger *slog.Logger
config ruleImporterConfig
apiClient queryRangeAPI
@@ -57,8 +57,8 @@ type ruleImporterConfig struct {
// newRuleImporter creates a new rule importer that can be used to parse and evaluate recording rule files and create new series
// written to disk in blocks.
-func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter {
- level.Info(logger).Log("backfiller", "new rule importer", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822))
+func newRuleImporter(logger *slog.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter {
+ logger.Info("new rule importer", "component", "backfiller", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822))
return &ruleImporter{
logger: logger,
config: config,
@@ -69,7 +69,7 @@ func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient que
// loadGroups parses groups from a list of recording rule files.
func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string) (errs []error) {
- groups, errs := importer.ruleManager.LoadGroups(importer.config.evalInterval, labels.Labels{}, "", nil, filenames...)
+ groups, errs := importer.ruleManager.LoadGroups(importer.config.evalInterval, labels.Labels{}, "", nil, false, filenames...)
if errs != nil {
return errs
}
@@ -80,10 +80,10 @@ func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string)
// importAll evaluates all the recording rules and creates new time series and writes them to disk in blocks.
func (importer *ruleImporter) importAll(ctx context.Context) (errs []error) {
for name, group := range importer.groups {
- level.Info(importer.logger).Log("backfiller", "processing group", "name", name)
+ importer.logger.Info("processing group", "component", "backfiller", "name", name)
for i, r := range group.Rules() {
- level.Info(importer.logger).Log("backfiller", "processing rule", "id", i, "name", r.Name())
+ importer.logger.Info("processing rule", "component", "backfiller", "id", i, "name", r.Name())
if err := importer.importRule(ctx, r.Query().String(), r.Name(), r.Labels(), importer.config.start, importer.config.end, int64(importer.config.maxBlockDuration/time.Millisecond), group); err != nil {
errs = append(errs, err)
}
@@ -124,7 +124,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
return fmt.Errorf("query range: %w", err)
}
if warnings != nil {
- level.Warn(importer.logger).Log("msg", "Range query returned warnings.", "warnings", warnings)
+ importer.logger.Warn("Range query returned warnings.", "warnings", warnings)
}
// To prevent races with compaction, a block writer only allows appending samples
@@ -133,7 +133,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
// also need to append samples throughout the whole block range. To allow that, we
// pretend that the block is twice as large here, but only really add sample in the
// original interval later.
- w, err := tsdb.NewBlockWriter(log.NewNopLogger(), importer.config.outputDir, 2*blockDuration)
+ w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), importer.config.outputDir, 2*blockDuration)
if err != nil {
return fmt.Errorf("new block writer: %w", err)
}
diff --git a/cmd/promtool/rules_test.go b/cmd/promtool/rules_test.go
index d55fb0c896..3cb47aa8af 100644
--- a/cmd/promtool/rules_test.go
+++ b/cmd/promtool/rules_test.go
@@ -21,9 +21,9 @@ import (
"testing"
"time"
- "github.com/go-kit/log"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
@@ -35,7 +35,7 @@ type mockQueryRangeAPI struct {
samples model.Matrix
}
-func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, query string, r v1.Range, opts ...v1.Option) (model.Value, v1.Warnings, error) {
+func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, _ string, _ v1.Range, _ ...v1.Option) (model.Value, v1.Warnings, error) {
return mockAPI.samples, v1.Warnings{}, nil
}
@@ -43,6 +43,7 @@ const defaultBlockDuration = time.Duration(tsdb.DefaultBlockDuration) * time.Mil
// TestBackfillRuleIntegration is an integration test that runs all the rule importer code to confirm the parts work together.
func TestBackfillRuleIntegration(t *testing.T) {
+ t.Parallel()
const (
testMaxSampleCount = 50
testValue = 123
@@ -72,6 +73,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
tmpDir := t.TempDir()
ctx := context.Background()
@@ -161,7 +163,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
}
func newTestRuleImporter(_ context.Context, start time.Time, tmpDir string, testSamples model.Matrix, maxBlockDuration time.Duration) (*ruleImporter, error) {
- logger := log.NewNopLogger()
+ logger := promslog.NewNopLogger()
cfg := ruleImporterConfig{
outputDir: tmpDir,
start: start.Add(-10 * time.Hour),
@@ -210,6 +212,7 @@ func createMultiRuleTestFiles(path string) error {
// TestBackfillLabels confirms that the labels in the rule file override the labels from the metrics
// received from Prometheus Query API, including the __name__ label.
func TestBackfillLabels(t *testing.T) {
+ t.Parallel()
tmpDir := t.TempDir()
ctx := context.Background()
@@ -251,6 +254,7 @@ func TestBackfillLabels(t *testing.T) {
require.NoError(t, err)
t.Run("correct-labels", func(t *testing.T) {
+ t.Parallel()
selectedSeries := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
for selectedSeries.Next() {
series := selectedSeries.At()
diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go
index e65262d439..884864205c 100644
--- a/cmd/promtool/sd.go
+++ b/cmd/promtool/sd.go
@@ -20,9 +20,9 @@ import (
"os"
"time"
- "github.com/go-kit/log"
"github.com/google/go-cmp/cmp"
"github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@@ -38,10 +38,10 @@ type sdCheckResult struct {
}
// CheckSD performs service discovery for the given job name and reports the results.
-func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int {
- logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))
+func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, _ prometheus.Registerer) int {
+ logger := promslog.New(&promslog.Config{})
- cfg, err := config.LoadFile(sdConfigFiles, false, false, logger)
+ cfg, err := config.LoadFile(sdConfigFiles, false, logger)
if err != nil {
fmt.Fprintln(os.Stderr, "Cannot load config", err)
return failureExitCode
@@ -114,7 +114,7 @@ outerLoop:
}
results := []sdCheckResult{}
for _, tgs := range sdCheckResults {
- results = append(results, getSDCheckResult(tgs, scrapeConfig, noDefaultScrapePort)...)
+ results = append(results, getSDCheckResult(tgs, scrapeConfig)...)
}
res, err := json.MarshalIndent(results, "", " ")
@@ -127,7 +127,7 @@ outerLoop:
return successExitCode
}
-func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig, noDefaultScrapePort bool) []sdCheckResult {
+func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig) []sdCheckResult {
sdCheckResults := []sdCheckResult{}
lb := labels.NewBuilder(labels.EmptyLabels())
for _, targetGroup := range targetGroups {
@@ -144,7 +144,9 @@ func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.Sc
}
}
- res, orig, err := scrape.PopulateLabels(lb, scrapeConfig, noDefaultScrapePort)
+ scrape.PopulateDiscoveredLabels(lb, scrapeConfig, target, targetGroup.Labels)
+ orig := lb.Labels()
+ res, err := scrape.PopulateLabels(lb, scrapeConfig, target, targetGroup.Labels)
result := sdCheckResult{
DiscoveredLabels: orig,
Labels: res,
diff --git a/cmd/promtool/sd_test.go b/cmd/promtool/sd_test.go
index cb65ee72aa..8f174a9b80 100644
--- a/cmd/promtool/sd_test.go
+++ b/cmd/promtool/sd_test.go
@@ -18,17 +18,17 @@ import (
"time"
"github.com/prometheus/common/model"
+ "github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
"github.com/prometheus/prometheus/util/testutil"
-
- "github.com/stretchr/testify/require"
)
func TestSDCheckResult(t *testing.T) {
+ t.Parallel()
targetGroups := []*targetgroup.Group{{
Targets: []model.LabelSet{
map[model.LabelName]model.LabelValue{"__address__": "localhost:8080", "foo": "bar"},
@@ -70,5 +70,5 @@ func TestSDCheckResult(t *testing.T) {
},
}
- testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig, true))
+ testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig))
}
diff --git a/cmd/promtool/testdata/bad-sd-file-extension.nonexistant b/cmd/promtool/testdata/bad-sd-file-extension.nonexistent
similarity index 100%
rename from cmd/promtool/testdata/bad-sd-file-extension.nonexistant
rename to cmd/promtool/testdata/bad-sd-file-extension.nonexistent
diff --git a/cmd/promtool/testdata/config_with_service_discovery_files.yml b/cmd/promtool/testdata/config_with_service_discovery_files.yml
index 13b6d7faff..6a550a8403 100644
--- a/cmd/promtool/testdata/config_with_service_discovery_files.yml
+++ b/cmd/promtool/testdata/config_with_service_discovery_files.yml
@@ -6,7 +6,7 @@ scrape_configs:
alerting:
alertmanagers:
- scheme: http
- api_version: v1
+ api_version: v2
file_sd_configs:
- files:
- nonexistent_file.yml
diff --git a/cmd/promtool/testdata/features.yml b/cmd/promtool/testdata/features.yml
new file mode 100644
index 0000000000..769f8362bf
--- /dev/null
+++ b/cmd/promtool/testdata/features.yml
@@ -0,0 +1,6 @@
+groups:
+ - name: features
+ rules:
+ - record: x
+ # We don't expect anything from this, just want to check the function parses.
+ expr: sort_by_label(up, "instance")
diff --git a/cmd/promtool/testdata/prometheus-config.lint.too_long_scrape_interval.yml b/cmd/promtool/testdata/prometheus-config.lint.too_long_scrape_interval.yml
new file mode 100644
index 0000000000..0c85d13f31
--- /dev/null
+++ b/cmd/promtool/testdata/prometheus-config.lint.too_long_scrape_interval.yml
@@ -0,0 +1,3 @@
+scrape_configs:
+ - job_name: too_long_scrape_interval_test
+ scrape_interval: 10m
diff --git a/cmd/promtool/testdata/rules_extrafields.yml b/cmd/promtool/testdata/rules_extrafields.yml
new file mode 100644
index 0000000000..85ef079bb8
--- /dev/null
+++ b/cmd/promtool/testdata/rules_extrafields.yml
@@ -0,0 +1,33 @@
+# This is the rules file. It has an extra "ownership"
+# field in the second group. promtool should ignore this field
+# and not return an error with --ignore-unknown-fields.
+
+groups:
+ - name: alerts
+ namespace: "foobar"
+ rules:
+ - alert: InstanceDown
+ expr: up == 0
+ for: 5m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance {{ $labels.instance }} down"
+ description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
+ - alert: AlwaysFiring
+ expr: 1
+
+ - name: rules
+ ownership:
+ service: "test"
+ rules:
+ - record: job:test:count_over_time1m
+ expr: sum without(instance) (count_over_time(test[1m]))
+
+ # A recording rule that doesn't depend on input series.
+ - record: fixed_data
+ expr: 1
+
+ # Subquery with default resolution test.
+ - record: suquery_interval_test
+ expr: count_over_time(up[5m:])
diff --git a/cmd/promtool/testdata/rules_run_extrafields.yml b/cmd/promtool/testdata/rules_run_extrafields.yml
new file mode 100644
index 0000000000..86879fc396
--- /dev/null
+++ b/cmd/promtool/testdata/rules_run_extrafields.yml
@@ -0,0 +1,21 @@
+# Minimal test case to see that --ignore-unknown-fields
+# is working as expected. It should not return an error
+# when any extra fields are present in the rules file.
+rule_files:
+ - rules_extrafields.yml
+
+evaluation_interval: 1m
+
+
+tests:
+ - name: extra ownership field test
+ input_series:
+ - series: test
+ values: 1
+
+ promql_expr_test:
+ - expr: test
+ eval_time: 0
+ exp_samples:
+ - value: 1
+ labels: test
diff --git a/cmd/promtool/testdata/rules_run_fuzzy.yml b/cmd/promtool/testdata/rules_run_fuzzy.yml
new file mode 100644
index 0000000000..3bf4e47a45
--- /dev/null
+++ b/cmd/promtool/testdata/rules_run_fuzzy.yml
@@ -0,0 +1,43 @@
+# Minimal test case to see that fuzzy compare is working as expected.
+# It should allow slight floating point differences through. Larger
+# floating point differences should still fail.
+
+evaluation_interval: 1m
+fuzzy_compare: true
+
+tests:
+ - name: correct fuzzy match
+ input_series:
+ - series: test_low
+ values: 2.9999999999999996
+ - series: test_high
+ values: 3.0000000000000004
+ promql_expr_test:
+ - expr: test_low
+ eval_time: 0
+ exp_samples:
+ - labels: test_low
+ value: 3
+ - expr: test_high
+ eval_time: 0
+ exp_samples:
+ - labels: test_high
+ value: 3
+
+ - name: wrong fuzzy match
+ input_series:
+ - series: test_low
+ values: 2.9999999999999987
+ - series: test_high
+ values: 3.0000000000000013
+ promql_expr_test:
+ - expr: test_low
+ eval_time: 0
+ exp_samples:
+ - labels: test_low
+ value: 3
+ - expr: test_high
+ eval_time: 0
+ exp_samples:
+ - labels: test_high
+ value: 3
diff --git a/cmd/promtool/testdata/rules_run_no_fuzzy.yml b/cmd/promtool/testdata/rules_run_no_fuzzy.yml
new file mode 100644
index 0000000000..eba201a28c
--- /dev/null
+++ b/cmd/promtool/testdata/rules_run_no_fuzzy.yml
@@ -0,0 +1,24 @@
+# Minimal test case to see that fuzzy compare can be turned off,
+# and slight floating point differences fail matching.
+
+evaluation_interval: 1m
+fuzzy_compare: false
+
+tests:
+ - name: correct fuzzy match
+ input_series:
+ - series: test_low
+ values: 2.9999999999999996
+ - series: test_high
+ values: 3.0000000000000004
+ promql_expr_test:
+ - expr: test_low
+ eval_time: 0
+ exp_samples:
+ - labels: test_low
+ value: 3
+ - expr: test_high
+ eval_time: 0
+ exp_samples:
+ - labels: test_high
+ value: 3
diff --git a/cmd/promtool/testdata/unittest.yml b/cmd/promtool/testdata/unittest.yml
index ff511729ba..e2a8230902 100644
--- a/cmd/promtool/testdata/unittest.yml
+++ b/cmd/promtool/testdata/unittest.yml
@@ -69,13 +69,13 @@ tests:
eval_time: 2m
exp_samples:
- labels: "test_histogram_repeat"
- histogram: "{{count:2 sum:3 buckets:[2]}}"
+ histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}"
- expr: test_histogram_increase
eval_time: 2m
exp_samples:
- labels: "test_histogram_increase"
- histogram: "{{count:4 sum:5.6 buckets:[4]}}"
+ histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}"
# Ensure a value is stale as soon as it is marked as such.
- expr: test_stale
@@ -89,11 +89,11 @@ tests:
# Ensure lookback delta is respected, when a value is missing.
- expr: timestamp(test_missing)
- eval_time: 5m
+ eval_time: 4m59s
exp_samples:
- value: 0
- expr: timestamp(test_missing)
- eval_time: 5m1s
+ eval_time: 5m
exp_samples: []
# Minimal test case to check edge case of a single sample.
@@ -113,7 +113,7 @@ tests:
- expr: count_over_time(fixed_data[1h])
eval_time: 1h
exp_samples:
- - value: 61
+ - value: 60
- expr: timestamp(fixed_data)
eval_time: 1h
exp_samples:
@@ -183,7 +183,7 @@ tests:
- expr: job:test:count_over_time1m
eval_time: 1m
exp_samples:
- - value: 61
+ - value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 1m10s
@@ -194,7 +194,7 @@ tests:
- expr: job:test:count_over_time1m
eval_time: 2m
exp_samples:
- - value: 61
+ - value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 2m59s999ms
diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go
index 2ed7244b1c..f512728ac9 100644
--- a/cmd/promtool/tsdb.go
+++ b/cmd/promtool/tsdb.go
@@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"io"
+ "log/slog"
"os"
"path/filepath"
"runtime"
@@ -32,7 +33,7 @@ import (
"time"
"github.com/alecthomas/units"
- "github.com/go-kit/log"
+ "github.com/prometheus/common/promslog"
"go.uber.org/atomic"
"github.com/prometheus/prometheus/model/labels"
@@ -60,7 +61,7 @@ type writeBenchmark struct {
memprof *os.File
blockprof *os.File
mtxprof *os.File
- logger log.Logger
+ logger *slog.Logger
}
func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) error {
@@ -68,7 +69,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err
outPath: outPath,
samplesFile: samplesFile,
numMetrics: numMetrics,
- logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)),
+ logger: promslog.New(&promslog.Config{}),
}
if b.outPath == "" {
dir, err := os.MkdirTemp("", "tsdb_bench")
@@ -87,9 +88,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err
dir := filepath.Join(b.outPath, "storage")
- l := log.With(b.logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller)
-
- st, err := tsdb.Open(dir, l, nil, &tsdb.Options{
+ st, err := tsdb.Open(dir, b.logger, nil, &tsdb.Options{
RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond),
MinBlockDuration: int64(2 * time.Hour / time.Millisecond),
}, tsdb.NewDBStats())
@@ -315,12 +314,11 @@ func readPrometheusLabels(r io.Reader, n int) ([]labels.Labels, error) {
i := 0
for scanner.Scan() && i < n {
- m := make([]labels.Label, 0, 10)
-
r := strings.NewReplacer("\"", "", "{", "", "}", "")
s := r.Replace(scanner.Text())
labelChunks := strings.Split(s, ",")
+ m := make([]labels.Label, 0, len(labelChunks))
for _, labelChunk := range labelChunks {
split := strings.Split(labelChunk, ":")
m = append(m, labels.Label{Name: split[0], Value: split[1]})
@@ -367,25 +365,25 @@ func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) {
fmt.Fprintf(tw,
"%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n",
meta.ULID,
- getFormatedTime(meta.MinTime, humanReadable),
- getFormatedTime(meta.MaxTime, humanReadable),
+ getFormattedTime(meta.MinTime, humanReadable),
+ getFormattedTime(meta.MaxTime, humanReadable),
time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond,
meta.Stats.NumSamples,
meta.Stats.NumChunks,
meta.Stats.NumSeries,
- getFormatedBytes(b.Size(), humanReadable),
+ getFormattedBytes(b.Size(), humanReadable),
)
}
}
-func getFormatedTime(timestamp int64, humanReadable bool) string {
+func getFormattedTime(timestamp int64, humanReadable bool) string {
if humanReadable {
return time.Unix(timestamp/1000, 0).UTC().String()
}
return strconv.FormatInt(timestamp, 10)
}
-func getFormatedBytes(bytes int64, humanReadable bool) string {
+func getFormattedBytes(bytes int64, humanReadable bool) string {
if humanReadable {
return units.Base2Bytes(bytes).String()
}
@@ -405,7 +403,7 @@ func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error)
}
}
- b, err := db.Block(blockID)
+ b, err := db.Block(blockID, tsdb.DefaultPostingsDecoderFactory)
if err != nil {
return nil, nil, err
}
@@ -554,7 +552,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
postingInfos = postingInfos[:0]
for _, n := range allLabelNames {
- values, err := ir.SortedLabelValues(ctx, n, selectors...)
+ values, err := ir.SortedLabelValues(ctx, n, nil, selectors...)
if err != nil {
return err
}
@@ -570,7 +568,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
postingInfos = postingInfos[:0]
for _, n := range allLabelNames {
- lv, err := ir.SortedLabelValues(ctx, n, selectors...)
+ lv, err := ir.SortedLabelValues(ctx, n, nil, selectors...)
if err != nil {
return err
}
@@ -580,7 +578,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
printInfo(postingInfos)
postingInfos = postingInfos[:0]
- lv, err := ir.SortedLabelValues(ctx, "__name__", selectors...)
+ lv, err := ir.SortedLabelValues(ctx, "__name__", nil, selectors...)
if err != nil {
return err
}
@@ -589,7 +587,10 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
if err != nil {
return err
}
- postings = index.Intersect(postings, index.NewListPostings(refs))
+ // Only intersect postings if matchers are specified.
+ if len(matchers) > 0 {
+ postings = index.Intersect(postings, index.NewListPostings(refs))
+ }
count := 0
for postings.Next() {
count++
@@ -662,7 +663,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
fhchk, ok := chk.(*chunkenc.FloatHistogramChunk)
if !ok {
- return fmt.Errorf("chunk is not FloatHistogramChunk")
+ return errors.New("chunk is not FloatHistogramChunk")
}
it := fhchk.Iterator(nil)
bucketCount := 0
@@ -677,7 +678,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
hchk, ok := chk.(*chunkenc.HistogramChunk)
if !ok {
- return fmt.Errorf("chunk is not HistogramChunk")
+ return errors.New("chunk is not HistogramChunk")
}
it := hchk.Iterator(nil)
bucketCount := 0
@@ -733,7 +734,7 @@ func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt i
for _, mset := range matcherSets {
sets = append(sets, q.Select(ctx, true, nil, mset...))
}
- ss = storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
+ ss = storage.NewMergeSeriesSet(sets, 0, storage.ChainedSeriesMerge)
} else {
ss = q.Select(ctx, false, nil, matcherSets[0]...)
}
@@ -823,18 +824,32 @@ func checkErr(err error) int {
return 0
}
-func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int {
- inputFile, err := fileutil.OpenMmapFile(path)
+func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) int {
+ var buf []byte
+ info, err := os.Stat(path)
if err != nil {
return checkErr(err)
}
- defer inputFile.Close()
+ if info.Mode()&(os.ModeNamedPipe|os.ModeCharDevice) != 0 {
+ // Read the pipe chunks by chunks as it cannot be mmap-ed
+ buf, err = os.ReadFile(path)
+ if err != nil {
+ return checkErr(err)
+ }
+ } else {
+ inputFile, err := fileutil.OpenMmapFile(path)
+ if err != nil {
+ return checkErr(err)
+ }
+ defer inputFile.Close()
+ buf = inputFile.Bytes()
+ }
if err := os.MkdirAll(outputDir, 0o777); err != nil {
return checkErr(fmt.Errorf("create output dir: %w", err))
}
- return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
+ return checkErr(backfill(5000, buf, outputDir, humanReadable, quiet, maxBlockDuration, customLabels))
}
func displayHistogram(dataType string, datas []int, total int) {
@@ -866,16 +881,16 @@ func displayHistogram(dataType string, datas []int, total int) {
fmt.Println()
}
-func generateBucket(min, max int) (start, end, step int) {
- s := (max - min) / 10
+func generateBucket(minVal, maxVal int) (start, end, step int) {
+ s := (maxVal - minVal) / 10
step = 10
for step < s && step <= 10000 {
step *= 10
}
- start = min - min%step
- end = max - max%step + step
+ start = minVal - minVal%step
+ end = maxVal - maxVal%step + step
return
}
diff --git a/cmd/promtool/tsdb_posix_test.go b/cmd/promtool/tsdb_posix_test.go
new file mode 100644
index 0000000000..8a83aead70
--- /dev/null
+++ b/cmd/promtool/tsdb_posix_test.go
@@ -0,0 +1,69 @@
+// Copyright 2017 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !windows
+
+package main
+
+import (
+ "bytes"
+ "io"
+ "math"
+ "os"
+ "path"
+ "syscall"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+
+ "github.com/prometheus/prometheus/tsdb"
+)
+
+func TestTSDBDumpOpenMetricsRoundTripPipe(t *testing.T) {
+ initialMetrics, err := os.ReadFile("testdata/dump-openmetrics-roundtrip-test.prom")
+ require.NoError(t, err)
+ initialMetrics = normalizeNewLine(initialMetrics)
+
+ pipeDir := t.TempDir()
+ dbDir := t.TempDir()
+
+ // create pipe
+ pipe := path.Join(pipeDir, "pipe")
+ err = syscall.Mkfifo(pipe, 0o666)
+ require.NoError(t, err)
+
+ go func() {
+ // open pipe to write
+ in, err := os.OpenFile(pipe, os.O_WRONLY, os.ModeNamedPipe)
+ require.NoError(t, err)
+ defer func() { require.NoError(t, in.Close()) }()
+ _, err = io.Copy(in, bytes.NewReader(initialMetrics))
+ require.NoError(t, err)
+ }()
+
+ // Import samples from OM format
+ code := backfillOpenMetrics(pipe, dbDir, false, false, 2*time.Hour, map[string]string{})
+ require.Equal(t, 0, code)
+ db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil)
+ require.NoError(t, err)
+ t.Cleanup(func() {
+ require.NoError(t, db.Close())
+ })
+
+ // Dump the blocks into OM format
+ dumpedMetrics := getDumpedSamples(t, dbDir, "", math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
+
+ // Should get back the initial metrics.
+ require.Equal(t, string(initialMetrics), dumpedMetrics)
+}
diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go
index 75089b168b..e745a3fe7a 100644
--- a/cmd/promtool/tsdb_test.go
+++ b/cmd/promtool/tsdb_test.go
@@ -20,6 +20,7 @@ import (
"math"
"os"
"runtime"
+ "slices"
"strings"
"testing"
"time"
@@ -31,6 +32,7 @@ import (
)
func TestGenerateBucket(t *testing.T) {
+ t.Parallel()
tcs := []struct {
min, max int
start, end, step int
@@ -54,7 +56,7 @@ func TestGenerateBucket(t *testing.T) {
}
// getDumpedSamples dumps samples and returns them.
-func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string {
+func getDumpedSamples(t *testing.T, databasePath, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string {
t.Helper()
oldStdout := os.Stdout
@@ -63,8 +65,8 @@ func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []strin
err := dumpSamples(
context.Background(),
- path,
- t.TempDir(),
+ databasePath,
+ sandboxDirRoot,
mint,
maxt,
match,
@@ -95,13 +97,15 @@ func TestTSDBDump(t *testing.T) {
heavy_metric{foo="bar"} 5 4 3 2 1
heavy_metric{foo="foo"} 5 4 3 2 1
`)
+ t.Cleanup(func() { storage.Close() })
tests := []struct {
- name string
- mint int64
- maxt int64
- match []string
- expectedDump string
+ name string
+ mint int64
+ maxt int64
+ sandboxDirRoot string
+ match []string
+ expectedDump string
}{
{
name: "default match",
@@ -110,6 +114,14 @@ func TestTSDBDump(t *testing.T) {
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
},
+ {
+ name: "default match with sandbox dir root set",
+ mint: math.MinInt64,
+ maxt: math.MaxInt64,
+ sandboxDirRoot: t.TempDir(),
+ match: []string{"{__name__=~'(?s:.*)'}"},
+ expectedDump: "testdata/dump-test-1.prom",
+ },
{
name: "same matcher twice",
mint: math.MinInt64,
@@ -148,28 +160,51 @@ func TestTSDBDump(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.mint, tt.maxt, tt.match, formatSeriesSet)
+ dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, tt.mint, tt.maxt, tt.match, formatSeriesSet)
expectedMetrics, err := os.ReadFile(tt.expectedDump)
require.NoError(t, err)
expectedMetrics = normalizeNewLine(expectedMetrics)
- // even though in case of one matcher samples are not sorted, the order in the cases above should stay the same.
- require.Equal(t, string(expectedMetrics), dumpedMetrics)
+ // Sort both, because Prometheus does not guarantee the output order.
+ require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
})
}
}
+func sortLines(buf string) string {
+ lines := strings.Split(buf, "\n")
+ slices.Sort(lines)
+ return strings.Join(lines, "\n")
+}
+
func TestTSDBDumpOpenMetrics(t *testing.T) {
storage := promqltest.LoadedStorage(t, `
load 1m
my_counter{foo="bar", baz="abc"} 1 2 3 4 5
my_gauge{bar="foo", abc="baz"} 9 8 0 4 7
`)
+ t.Cleanup(func() { storage.Close() })
- expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom")
- require.NoError(t, err)
- expectedMetrics = normalizeNewLine(expectedMetrics)
- dumpedMetrics := getDumpedSamples(t, storage.Dir(), math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
- require.Equal(t, string(expectedMetrics), dumpedMetrics)
+ tests := []struct {
+ name string
+ sandboxDirRoot string
+ }{
+ {
+ name: "default match",
+ },
+ {
+ name: "default match with sandbox dir root set",
+ sandboxDirRoot: t.TempDir(),
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom")
+ require.NoError(t, err)
+ expectedMetrics = normalizeNewLine(expectedMetrics)
+ dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
+ require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
+ })
+ }
}
func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
@@ -179,7 +214,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
dbDir := t.TempDir()
// Import samples from OM format
- err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour)
+ err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour, map[string]string{})
require.NoError(t, err)
db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil)
require.NoError(t, err)
@@ -188,7 +223,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
})
// Dump the blocks into OM format
- dumpedMetrics := getDumpedSamples(t, dbDir, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
+ dumpedMetrics := getDumpedSamples(t, dbDir, "", math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
// Should get back the initial metrics.
require.Equal(t, string(initialMetrics), dumpedMetrics)
diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go
index 5451c5296c..4910a0b1a6 100644
--- a/cmd/promtool/unittest.go
+++ b/cmd/promtool/unittest.go
@@ -18,6 +18,8 @@ import (
"encoding/json"
"errors"
"fmt"
+ "io"
+ "math"
"os"
"path/filepath"
"sort"
@@ -25,11 +27,11 @@ import (
"strings"
"time"
- "github.com/go-kit/log"
"github.com/google/go-cmp/cmp"
"github.com/grafana/regexp"
"github.com/nsf/jsondiff"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/model/histogram"
@@ -39,12 +41,18 @@ import (
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
+ "github.com/prometheus/prometheus/util/junitxml"
)
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
-func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int {
+func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
+ return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, ignoreUnknownFields, files...)
+}
+
+func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
failed := false
+ junit := &junitxml.JUnitXML{}
var run *regexp.Regexp
if runStrings != nil {
@@ -52,7 +60,7 @@ func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, dif
}
for _, f := range files {
- if errs := ruleUnitTest(f, queryOpts, run, diffFlag); errs != nil {
+ if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, ignoreUnknownFields, junit.Suite(f)); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@@ -64,25 +72,30 @@ func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, dif
}
fmt.Println()
}
+ err := junit.WriteXML(results)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "failed to write JUnit XML: %s\n", err)
+ }
if failed {
return failureExitCode
}
return successExitCode
}
-func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error {
- fmt.Println("Unit Testing: ", filename)
-
+func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug, ignoreUnknownFields bool, ts *junitxml.TestSuite) []error {
b, err := os.ReadFile(filename)
if err != nil {
+ ts.Abort(err)
return []error{err}
}
var unitTestInp unitTestFile
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
+ ts.Abort(err)
return []error{err}
}
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
+ ts.Abort(err)
return []error{err}
}
@@ -91,29 +104,38 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg
}
evalInterval := time.Duration(unitTestInp.EvaluationInterval)
-
+ ts.Settime(time.Now().Format("2006-01-02T15:04:05"))
// Giving number for groups mentioned in the file for ordering.
// Lower number group should be evaluated before higher number group.
groupOrderMap := make(map[string]int)
for i, gn := range unitTestInp.GroupEvalOrder {
if _, ok := groupOrderMap[gn]; ok {
- return []error{fmt.Errorf("group name repeated in evaluation order: %s", gn)}
+ err := fmt.Errorf("group name repeated in evaluation order: %s", gn)
+ ts.Abort(err)
+ return []error{err}
}
groupOrderMap[gn] = i
}
// Testing.
var errs []error
- for _, t := range unitTestInp.Tests {
+ for i, t := range unitTestInp.Tests {
if !matchesRun(t.TestGroupName, run) {
continue
}
-
+ testname := t.TestGroupName
+ if testname == "" {
+ testname = fmt.Sprintf("unnamed#%d", i)
+ }
+ tc := ts.Case(testname)
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
- ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...)
+ ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, ignoreUnknownFields, unitTestInp.FuzzyCompare, unitTestInp.RuleFiles...)
if ers != nil {
+ for _, e := range ers {
+ tc.Fail(e.Error())
+ }
errs = append(errs, ers...)
}
}
@@ -138,6 +160,7 @@ type unitTestFile struct {
EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"`
GroupEvalOrder []string `yaml:"group_eval_order"`
Tests []testGroup `yaml:"tests"`
+ FuzzyCompare bool `yaml:"fuzzy_compare,omitempty"`
}
// resolveAndGlobFilepaths joins all relative paths in a configuration
@@ -176,7 +199,14 @@ type testGroup struct {
}
// test performs the unit tests.
-func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) {
+func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug, ignoreUnknownFields, fuzzyCompare bool, ruleFiles ...string) (outErr []error) {
+ if debug {
+ testStart := time.Now()
+ fmt.Printf("DEBUG: Starting test %s\n", testname)
+ defer func() {
+ fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart))
+ }()
+ }
// Setup testing suite.
suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts)
if err != nil {
@@ -195,11 +225,11 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()),
Appendable: suite.Storage(),
Context: context.Background(),
- NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {},
- Logger: log.NewNopLogger(),
+ NotifyFunc: func(_ context.Context, _ string, _ ...*rules.Alert) {},
+ Logger: promslog.NewNopLogger(),
}
m := rules.NewManager(opts)
- groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ruleFiles...)
+ groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ignoreUnknownFields, ruleFiles...)
if ers != nil {
return ers
}
@@ -209,6 +239,14 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
mint := time.Unix(0, 0).UTC()
maxt := mint.Add(tg.maxEvalTime())
+ // Optional floating point compare fuzzing.
+ var compareFloat64 cmp.Option = cmp.Options{}
+ if fuzzyCompare {
+ compareFloat64 = cmp.Comparer(func(x, y float64) bool {
+ return x == y || math.Nextafter(x, math.Inf(-1)) == y || math.Nextafter(x, math.Inf(1)) == y
+ })
+ }
+
// Pre-processing some data for testing alerts.
// All this preparation is so that we can test alerts as we evaluate the rules.
// This avoids storing them in memory, as the number of evals might be high.
@@ -283,12 +321,8 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
return errs
}
- for {
- if !(curr < len(alertEvalTimes) && ts.Sub(mint) <= time.Duration(alertEvalTimes[curr]) &&
- time.Duration(alertEvalTimes[curr]) < ts.Add(evalInterval).Sub(mint)) {
- break
- }
-
+ for curr < len(alertEvalTimes) && ts.Sub(mint) <= time.Duration(alertEvalTimes[curr]) &&
+ time.Duration(alertEvalTimes[curr]) < ts.Add(evalInterval).Sub(mint) {
// We need to check alerts for this time.
// If 'ts <= `eval_time=alertEvalTimes[curr]` < ts+evalInterval'
// then we compare alerts with the Eval at `ts`.
@@ -346,7 +380,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
sort.Sort(gotAlerts)
sort.Sort(expAlerts)
- if !cmp.Equal(expAlerts, gotAlerts, cmp.Comparer(labels.Equal)) {
+ if !cmp.Equal(expAlerts, gotAlerts, cmp.Comparer(labels.Equal), compareFloat64) {
var testName string
if tg.TestGroupName != "" {
testName = fmt.Sprintf(" name: %s,\n", tg.TestGroupName)
@@ -454,12 +488,38 @@ Outer:
sort.Slice(gotSamples, func(i, j int) bool {
return labels.Compare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0
})
- if !cmp.Equal(expSamples, gotSamples, cmp.Comparer(labels.Equal)) {
+ if !cmp.Equal(expSamples, gotSamples, cmp.Comparer(labels.Equal), compareFloat64) {
errs = append(errs, fmt.Errorf(" expr: %q, time: %s,\n exp: %v\n got: %v", testCase.Expr,
testCase.EvalTime.String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples)))
}
}
+ if debug {
+ ts := tg.maxEvalTime()
+ // Potentially a test can be specified at a time with fractional seconds,
+ // which PromQL cannot represent, so round up to the next whole second.
+ ts = (ts + time.Second).Truncate(time.Second)
+ expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts)
+ q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts))
+ if err != nil {
+ fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err)
+ return errs
+ }
+ res := q.Exec(suite.Context())
+ if res.Err != nil {
+ fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err)
+ return errs
+ }
+ switch v := res.Value.(type) {
+ case promql.Matrix:
+ fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts)
+ fmt.Println(v.String())
+ default:
+ fmt.Printf("DEBUG: Got unexpected type %T\n", v)
+ return errs
+ }
+ }
+
if len(errs) > 0 {
return errs
}
diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go
index 2dbd5a4e51..566e0acbc6 100644
--- a/cmd/promtool/unittest_test.go
+++ b/cmd/promtool/unittest_test.go
@@ -14,14 +14,19 @@
package main
import (
+ "bytes"
+ "encoding/xml"
+ "fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/promql/promqltest"
+ "github.com/prometheus/prometheus/util/junitxml"
)
func TestRulesUnitTest(t *testing.T) {
+ t.Parallel()
type args struct {
files []string
}
@@ -125,25 +130,75 @@ func TestRulesUnitTest(t *testing.T) {
want: 0,
},
}
+ reuseFiles := []string{}
+ reuseCount := [2]int{}
for _, tt := range tests {
+ if (tt.queryOpts == promqltest.LazyLoaderOpts{
+ EnableNegativeOffset: true,
+ } || tt.queryOpts == promqltest.LazyLoaderOpts{
+ EnableAtModifier: true,
+ }) {
+ reuseFiles = append(reuseFiles, tt.args.files...)
+ reuseCount[tt.want] += len(tt.args.files)
+ }
t.Run(tt.name, func(t *testing.T) {
- if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want {
+ t.Parallel()
+ if got := RulesUnitTest(tt.queryOpts, nil, false, false, false, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
}
+ t.Run("Junit xml output ", func(t *testing.T) {
+ t.Parallel()
+ var buf bytes.Buffer
+ if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, false, reuseFiles...); got != 1 {
+ t.Errorf("RulesUnitTestResults() = %v, want 1", got)
+ }
+ var test junitxml.JUnitXML
+ output := buf.Bytes()
+ err := xml.Unmarshal(output, &test)
+ if err != nil {
+ fmt.Println("error in decoding XML:", err)
+ return
+ }
+ var total int
+ var passes int
+ var failures int
+ var cases int
+ total = len(test.Suites)
+ if total != len(reuseFiles) {
+ t.Errorf("JUnit output had %d testsuite elements; expected %d\n", total, len(reuseFiles))
+ }
+
+ for _, i := range test.Suites {
+ if i.FailureCount == 0 {
+ passes++
+ } else {
+ failures++
+ }
+ cases += len(i.Cases)
+ }
+ if total != passes+failures {
+ t.Errorf("JUnit output mismatch: Total testsuites (%d) does not equal the sum of passes (%d) and failures (%d).", total, passes, failures)
+ }
+ if cases < total {
+ t.Errorf("JUnit output had %d suites without test cases\n", total-cases)
+ }
+ })
}
func TestRulesUnitTestRun(t *testing.T) {
+ t.Parallel()
type args struct {
run []string
files []string
}
tests := []struct {
- name string
- args args
- queryOpts promqltest.LazyLoaderOpts
- want int
+ name string
+ args args
+ queryOpts promqltest.LazyLoaderOpts
+ want int
+ ignoreUnknownFields bool
}{
{
name: "Test all without run arg",
@@ -177,10 +232,42 @@ func TestRulesUnitTestRun(t *testing.T) {
},
want: 1,
},
+ {
+ name: "Test all with extra fields",
+ args: args{
+ files: []string{"./testdata/rules_run_extrafields.yml"},
+ },
+ ignoreUnknownFields: true,
+ want: 0,
+ },
+ {
+ name: "Test precise floating point comparison expected failure",
+ args: args{
+ files: []string{"./testdata/rules_run_no_fuzzy.yml"},
+ },
+ want: 1,
+ },
+ {
+ name: "Test fuzzy floating point comparison correct match",
+ args: args{
+ run: []string{"correct"},
+ files: []string{"./testdata/rules_run_fuzzy.yml"},
+ },
+ want: 0,
+ },
+ {
+ name: "Test fuzzy floating point comparison wrong match",
+ args: args{
+ run: []string{"wrong"},
+ files: []string{"./testdata/rules_run_fuzzy.yml"},
+ },
+ want: 1,
+ },
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...)
+ t.Parallel()
+ got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.ignoreUnknownFields, tt.args.files...)
require.Equal(t, tt.want, got)
})
}
diff --git a/config/config.go b/config/config.go
index c924e30989..12ca828ae8 100644
--- a/config/config.go
+++ b/config/config.go
@@ -16,27 +16,29 @@ package config
import (
"errors"
"fmt"
+ "log/slog"
+ "mime"
"net/url"
"os"
"path/filepath"
+ "slices"
"sort"
"strconv"
"strings"
"time"
"github.com/alecthomas/units"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
- "github.com/prometheus/common/sigv4"
+ "github.com/prometheus/sigv4"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
"github.com/prometheus/prometheus/storage/remote/azuread"
+ "github.com/prometheus/prometheus/storage/remote/googleiam"
)
var (
@@ -67,7 +69,7 @@ var (
)
// Load parses the YAML input s into a Config.
-func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) {
+func Load(s string, logger *slog.Logger) (*Config, error) {
cfg := &Config{}
// If the entire config body is empty the UnmarshalYAML method is
// never called. We thus have to set the DefaultConfig at the entry
@@ -79,10 +81,6 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro
return nil, err
}
- if !expandExternalLabels {
- return cfg, nil
- }
-
b := labels.NewScratchBuilder(0)
cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) {
newV := os.Expand(v.Value, func(s string) string {
@@ -92,26 +90,41 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro
if v := os.Getenv(s); v != "" {
return v
}
- level.Warn(logger).Log("msg", "Empty environment variable", "name", s)
+ logger.Warn("Empty environment variable", "name", s)
return ""
})
if newV != v.Value {
- level.Debug(logger).Log("msg", "External label replaced", "label", v.Name, "input", v.Value, "output", newV)
+ logger.Debug("External label replaced", "label", v.Name, "input", v.Value, "output", newV)
}
// Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024
b.Add(v.Name, newV)
})
- cfg.GlobalConfig.ExternalLabels = b.Labels()
+ if !b.Labels().IsEmpty() {
+ cfg.GlobalConfig.ExternalLabels = b.Labels()
+ }
+
+ switch cfg.OTLPConfig.TranslationStrategy {
+ case UnderscoreEscapingWithSuffixes:
+ case "":
+ case NoTranslation, NoUTF8EscapingWithSuffixes:
+ if cfg.GlobalConfig.MetricNameValidationScheme == model.LegacyValidation {
+ return nil, fmt.Errorf("OTLP translation strategy %q is not allowed when UTF8 is disabled", cfg.OTLPConfig.TranslationStrategy)
+ }
+ default:
+ return nil, fmt.Errorf("unsupported OTLP translation strategy %q", cfg.OTLPConfig.TranslationStrategy)
+ }
+ cfg.loaded = true
return cfg, nil
}
-// LoadFile parses the given YAML file into a Config.
-func LoadFile(filename string, agentMode, expandExternalLabels bool, logger log.Logger) (*Config, error) {
+// LoadFile parses and validates the given YAML file into a read-only Config.
+// Callers should never write to or shallow copy the returned Config.
+func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) {
content, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
- cfg, err := Load(string(content), expandExternalLabels, logger)
+ cfg, err := Load(string(content), logger)
if err != nil {
return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err)
}
@@ -139,6 +152,8 @@ var (
// DefaultConfig is the default top-level configuration.
DefaultConfig = Config{
GlobalConfig: DefaultGlobalConfig,
+ Runtime: DefaultRuntimeConfig,
+ OTLPConfig: DefaultOTLPConfig,
}
// DefaultGlobalConfig is the default global configuration.
@@ -149,24 +164,30 @@ var (
RuleQueryOffset: model.Duration(0 * time.Minute),
// When native histogram feature flag is enabled, ScrapeProtocols default
// changes to DefaultNativeHistogramScrapeProtocols.
- ScrapeProtocols: DefaultScrapeProtocols,
+ ScrapeProtocols: DefaultScrapeProtocols,
+ ConvertClassicHistogramsToNHCB: false,
+ AlwaysScrapeClassicHistograms: false,
+ MetricNameValidationScheme: model.UTF8Validation,
+ MetricNameEscapingScheme: model.AllowUTF8,
}
DefaultRuntimeConfig = RuntimeConfig{
// Go runtime tuning.
- GoGC: 75,
+ GoGC: getGoGC(),
}
- // DefaultScrapeConfig is the default scrape configuration.
+ // DefaultScrapeConfig is the default scrape configuration. Users of this
+ // default MUST call Validate() on the config after creation, even if it's
+ // used unaltered, to check for parameter correctness and fill out default
+ // values that can't be set inline in this declaration.
DefaultScrapeConfig = ScrapeConfig{
- // ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals.
- ScrapeClassicHistograms: false,
- MetricsPath: "/metrics",
- Scheme: "http",
- HonorLabels: false,
- HonorTimestamps: true,
- HTTPClientConfig: config.DefaultHTTPClientConfig,
- EnableCompression: true,
+ // ScrapeTimeout, ScrapeInterval, ScrapeProtocols, AlwaysScrapeClassicHistograms, and ConvertClassicHistogramsToNHCB default to the configured globals.
+ MetricsPath: "/metrics",
+ Scheme: "http",
+ HonorLabels: false,
+ HonorTimestamps: true,
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+ EnableCompression: true,
}
// DefaultAlertmanagerConfig is the default alertmanager configuration.
@@ -177,13 +198,18 @@ var (
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
+ DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{
+ FollowRedirects: true,
+ EnableHTTP2: false,
+ }
+
// DefaultRemoteWriteConfig is the default remote write configuration.
DefaultRemoteWriteConfig = RemoteWriteConfig{
RemoteTimeout: model.Duration(30 * time.Second),
ProtobufMessage: RemoteWriteProtoMsgV1,
QueueConfig: DefaultQueueConfig,
MetadataConfig: DefaultMetadataConfig,
- HTTPClientConfig: config.DefaultHTTPClientConfig,
+ HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig,
}
// DefaultQueueConfig is the default remote queue configuration.
@@ -215,6 +241,7 @@ var (
// DefaultRemoteReadConfig is the default remote read configuration.
DefaultRemoteReadConfig = RemoteReadConfig{
RemoteTimeout: model.Duration(1 * time.Minute),
+ ChunkedReadLimit: DefaultChunkedReadLimit,
HTTPClientConfig: config.DefaultHTTPClientConfig,
FilterExternalLabels: true,
}
@@ -227,6 +254,11 @@ var (
DefaultExemplarsConfig = ExemplarsConfig{
MaxExemplars: 100000,
}
+
+ // DefaultOTLPConfig is the default OTLP configuration.
+ DefaultOTLPConfig = OTLPConfig{
+ TranslationStrategy: UnderscoreEscapingWithSuffixes,
+ }
)
// Config is the top-level configuration for Prometheus's config files.
@@ -242,9 +274,13 @@ type Config struct {
RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"`
RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"`
+ OTLPConfig OTLPConfig `yaml:"otlp,omitempty"`
+
+ loaded bool // Certain methods require configuration to use Load validation.
}
// SetDirectory joins any relative file paths with dir.
+// This method writes to config, and it's not concurrency safe.
func (c *Config) SetDirectory(dir string) {
c.GlobalConfig.SetDirectory(dir)
c.AlertingConfig.SetDirectory(dir)
@@ -274,24 +310,26 @@ func (c Config) String() string {
return string(b)
}
-// GetScrapeConfigs returns the scrape configurations.
+// GetScrapeConfigs returns the read-only, validated scrape configurations including
+// the ones from the scrape_config_files.
+// This method does not write to config, and it's concurrency safe (the pointer receiver is for efficiency).
+// This method also assumes the Config was created by Load or LoadFile function, it returns error
+// if it was not. We can't re-validate or apply globals here due to races,
+// read more https://github.com/prometheus/prometheus/issues/15538.
func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
- scfgs := make([]*ScrapeConfig, len(c.ScrapeConfigs))
+ if !c.loaded {
+ // Programmatic error, we warn before more confusing errors would happen due to lack of the globalization.
+ return nil, errors.New("scrape config cannot be fetched, main config was not validated and loaded correctly; should not happen")
+ }
+ scfgs := make([]*ScrapeConfig, len(c.ScrapeConfigs))
jobNames := map[string]string{}
for i, scfg := range c.ScrapeConfigs {
- // We do these checks for library users that would not call validate in
- // Unmarshal.
- if err := scfg.Validate(c.GlobalConfig); err != nil {
- return nil, err
- }
-
- if _, ok := jobNames[scfg.JobName]; ok {
- return nil, fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName)
- }
jobNames[scfg.JobName] = "main config file"
scfgs[i] = scfg
}
+
+ // Re-read and validate the dynamic scrape config rules.
for _, pat := range c.ScrapeConfigFiles {
fs, err := filepath.Glob(pat)
if err != nil {
@@ -327,6 +365,7 @@ func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
+// NOTE: This method should not be used outside of this package. Use Load or LoadFile instead.
func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
*c = DefaultConfig
// We want to set c to the defaults and then overwrite it with the input.
@@ -347,8 +386,6 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
// We have to restore it here.
if c.Runtime.isZero() {
c.Runtime = DefaultRuntimeConfig
- // Use the GOGC env var value if the runtime section is empty.
- c.Runtime.GoGC = getGoGCEnv()
}
for _, rf := range c.RuleFiles {
@@ -363,18 +400,18 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
}
- // Do global overrides and validate unique names.
+ // Do global overrides and validation.
jobNames := map[string]struct{}{}
for _, scfg := range c.ScrapeConfigs {
if err := scfg.Validate(c.GlobalConfig); err != nil {
return err
}
-
if _, ok := jobNames[scfg.JobName]; ok {
return fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName)
}
jobNames[scfg.JobName] = struct{}{}
}
+
rwNames := map[string]struct{}{}
for _, rwcfg := range c.RemoteWriteConfigs {
if rwcfg == nil {
@@ -418,6 +455,8 @@ type GlobalConfig struct {
RuleQueryOffset model.Duration `yaml:"rule_query_offset,omitempty"`
// File to which PromQL queries are logged.
QueryLogFile string `yaml:"query_log_file,omitempty"`
+ // File to which scrape failures are logged.
+ ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"`
// The labels to add to any timeseries that this Prometheus instance scrapes.
ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
// An uncompressed response body larger than this many bytes will cause the
@@ -441,6 +480,17 @@ type GlobalConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
+ // Allow UTF8 Metric and Label Names. Can be blank in config files but must
+ // have a value if a GlobalConfig is created programmatically.
+ MetricNameValidationScheme model.ValidationScheme `yaml:"metric_name_validation_scheme,omitempty"`
+ // Metric name escaping mode to request through content negotiation. Can be
+ // blank in config files but must have a value if a ScrapeConfig is created
+ // programmatically.
+ MetricNameEscapingScheme string `yaml:"metric_name_escaping_scheme,omitempty"`
+ // Whether to convert all scraped classic histograms into native histograms with custom buckets.
+ ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"`
+ // Whether to scrape a classic histogram, even if it is also exposed as a native histogram.
+ AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"`
}
// ScrapeProtocol represents supported protocol for scraping metrics.
@@ -461,15 +511,30 @@ func (s ScrapeProtocol) Validate() error {
return nil
}
+// HeaderMediaType returns the MIME mediaType for a particular ScrapeProtocol.
+func (s ScrapeProtocol) HeaderMediaType() string {
+ if _, ok := ScrapeProtocolsHeaders[s]; !ok {
+ return ""
+ }
+ mediaType, _, err := mime.ParseMediaType(ScrapeProtocolsHeaders[s])
+ if err != nil {
+ return ""
+ }
+ return mediaType
+}
+
var (
PrometheusProto ScrapeProtocol = "PrometheusProto"
PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4"
+ PrometheusText1_0_0 ScrapeProtocol = "PrometheusText1.0.0"
OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
+ UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8
ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
PrometheusText0_0_4: "text/plain;version=0.0.4",
+ PrometheusText1_0_0: "text/plain;version=1.0.0",
OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1",
OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0",
}
@@ -479,6 +544,7 @@ var (
DefaultScrapeProtocols = []ScrapeProtocol{
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
+ PrometheusText1_0_0,
PrometheusText0_0_4,
}
@@ -490,6 +556,7 @@ var (
PrometheusProto,
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
+ PrometheusText1_0_0,
PrometheusText0_0_4,
}
)
@@ -515,6 +582,7 @@ func validateAcceptScrapeProtocols(sps []ScrapeProtocol) error {
// SetDirectory joins any relative file paths with dir.
func (c *GlobalConfig) SetDirectory(dir string) {
c.QueryLogFile = config.JoinDir(dir, c.QueryLogFile)
+ c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -577,13 +645,33 @@ func (c *GlobalConfig) isZero() bool {
c.EvaluationInterval == 0 &&
c.RuleQueryOffset == 0 &&
c.QueryLogFile == "" &&
- c.ScrapeProtocols == nil
+ c.ScrapeFailureLogFile == "" &&
+ c.ScrapeProtocols == nil &&
+ !c.ConvertClassicHistogramsToNHCB &&
+ !c.AlwaysScrapeClassicHistograms
}
+const DefaultGoGCPercentage = 75
+
// RuntimeConfig configures the values for the process behavior.
type RuntimeConfig struct {
// The Go garbage collection target percentage.
GoGC int `yaml:"gogc,omitempty"`
+
+ // Below are guidelines for adding a new field:
+ //
+ // For config that shouldn't change after startup, you might want to use
+ // flags https://prometheus.io/docs/prometheus/latest/command-line/prometheus/.
+ //
+ // Consider when the new field is first applied: at the very beginning of instance
+ // startup, after the TSDB is loaded etc. See https://github.com/prometheus/prometheus/pull/16491
+ // for an example.
+ //
+ // Provide a test covering various scenarios: empty config file, empty or incomplete runtime
+ // config block, precedence over other inputs (e.g., env vars, if applicable) etc.
+ // See TestRuntimeGOGCConfig (or https://github.com/prometheus/prometheus/pull/15238).
+ // The test should also verify behavior on reloads, since this config should be
+ // adjustable at runtime.
}
// isZero returns true iff the global config is the zero value.
@@ -614,10 +702,19 @@ type ScrapeConfig struct {
// The protocols to negotiate during a scrape. It tells clients what
// protocol are accepted by Prometheus and with what preference (most wanted is first).
// Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
- // OpenMetricsText1.0.0, PrometheusText0.0.4.
+ // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4.
ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"`
- // Whether to scrape a classic histogram that is also exposed as a native histogram.
- ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"`
+ // The fallback protocol to use if the Content-Type provided by the target
+ // is not provided, blank, or not one of the expected values.
+ // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
+ // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4.
+ ScrapeFallbackProtocol ScrapeProtocol `yaml:"fallback_scrape_protocol,omitempty"`
+ // Whether to scrape a classic histogram, even if it is also exposed as a native histogram.
+ AlwaysScrapeClassicHistograms *bool `yaml:"always_scrape_classic_histograms,omitempty"`
+ // Whether to convert all scraped classic histograms into a native histogram with custom buckets.
+ ConvertClassicHistogramsToNHCB *bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"`
+ // File to which scrape failures are logged.
+ ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"`
// The HTTP resource path on which to fetch metrics from targets.
MetricsPath string `yaml:"metrics_path,omitempty"`
// The URL scheme with which to fetch metrics from targets.
@@ -651,6 +748,13 @@ type ScrapeConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
+ // Allow UTF8 Metric and Label Names. Can be blank in config files but must
+ // have a value if a ScrapeConfig is created programmatically.
+ MetricNameValidationScheme model.ValidationScheme `yaml:"metric_name_validation_scheme,omitempty"`
+ // Metric name escaping mode to request through content negotiation. Can be
+ // blank in config files but must have a value if a ScrapeConfig is created
+ // programmatically.
+ MetricNameEscapingScheme string `yaml:"metric_name_escaping_scheme,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@@ -668,6 +772,7 @@ type ScrapeConfig struct {
func (c *ScrapeConfig) SetDirectory(dir string) {
c.ServiceDiscoveryConfigs.SetDirectory(dir)
c.HTTPClientConfig.SetDirectory(dir)
+ c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -749,6 +854,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
if c.KeepDroppedTargets == 0 {
c.KeepDroppedTargets = globalConfig.KeepDroppedTargets
}
+ if c.ScrapeFailureLogFile == "" {
+ c.ScrapeFailureLogFile = globalConfig.ScrapeFailureLogFile
+ }
if c.ScrapeProtocols == nil {
c.ScrapeProtocols = globalConfig.ScrapeProtocols
@@ -757,6 +865,70 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
}
+ if c.ScrapeFallbackProtocol != "" {
+ if err := c.ScrapeFallbackProtocol.Validate(); err != nil {
+ return fmt.Errorf("invalid fallback_scrape_protocol for scrape config with job name %q: %w", c.JobName, err)
+ }
+ }
+
+ //nolint:staticcheck
+ if model.NameValidationScheme != model.UTF8Validation {
+ return errors.New("model.NameValidationScheme must be set to UTF8")
+ }
+
+ switch globalConfig.MetricNameValidationScheme {
+ case model.UnsetValidation:
+ globalConfig.MetricNameValidationScheme = model.UTF8Validation
+ case model.LegacyValidation, model.UTF8Validation:
+ default:
+ return fmt.Errorf("unknown global name validation method specified, must be either '', 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme)
+ }
+ // Scrapeconfig validation scheme matches global if left blank.
+ switch c.MetricNameValidationScheme {
+ case model.UnsetValidation:
+ c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme
+ case model.LegacyValidation, model.UTF8Validation:
+ default:
+ return fmt.Errorf("unknown scrape config name validation method specified, must be either '', 'legacy' or 'utf8', got %s", c.MetricNameValidationScheme)
+ }
+
+ // Escaping scheme is based on the validation scheme if left blank.
+ switch globalConfig.MetricNameEscapingScheme {
+ case "":
+ if globalConfig.MetricNameValidationScheme == model.LegacyValidation {
+ globalConfig.MetricNameEscapingScheme = model.EscapeUnderscores
+ } else {
+ globalConfig.MetricNameEscapingScheme = model.AllowUTF8
+ }
+ case model.AllowUTF8, model.EscapeUnderscores, model.EscapeDots, model.EscapeValues:
+ default:
+ return fmt.Errorf("unknown global name escaping method specified, must be one of '%s', '%s', '%s', or '%s', got %q", model.AllowUTF8, model.EscapeUnderscores, model.EscapeDots, model.EscapeValues, globalConfig.MetricNameEscapingScheme)
+ }
+
+ if c.MetricNameEscapingScheme == "" {
+ c.MetricNameEscapingScheme = globalConfig.MetricNameEscapingScheme
+ }
+
+ switch c.MetricNameEscapingScheme {
+ case model.AllowUTF8:
+ if c.MetricNameValidationScheme != model.UTF8Validation {
+ return errors.New("utf8 metric names requested but validation scheme is not set to UTF8")
+ }
+ case model.EscapeUnderscores, model.EscapeDots, model.EscapeValues:
+ default:
+ return fmt.Errorf("unknown scrape config name escaping method specified, must be one of '%s', '%s', '%s', or '%s', got %q", model.AllowUTF8, model.EscapeUnderscores, model.EscapeDots, model.EscapeValues, c.MetricNameEscapingScheme)
+ }
+
+ if c.ConvertClassicHistogramsToNHCB == nil {
+ global := globalConfig.ConvertClassicHistogramsToNHCB
+ c.ConvertClassicHistogramsToNHCB = &global
+ }
+
+ if c.AlwaysScrapeClassicHistograms == nil {
+ global := globalConfig.AlwaysScrapeClassicHistograms
+ c.AlwaysScrapeClassicHistograms = &global
+ }
+
return nil
}
@@ -765,6 +937,35 @@ func (c *ScrapeConfig) MarshalYAML() (interface{}, error) {
return discovery.MarshalYAMLWithInlineConfigs(c)
}
+// ToEscapingScheme wraps the equivalent common library function with the
+// desired default behavior based on the given validation scheme. This is a
+// workaround for third party exporters that don't set the escaping scheme.
+func ToEscapingScheme(s string, v model.ValidationScheme) (model.EscapingScheme, error) {
+ if s == "" {
+ switch v {
+ case model.UTF8Validation:
+ return model.NoEscaping, nil
+ case model.LegacyValidation:
+ return model.UnderscoreEscaping, nil
+ case model.UnsetValidation:
+ return model.NoEscaping, fmt.Errorf("v is unset: %s", v)
+ default:
+ panic(fmt.Errorf("unhandled validation scheme: %s", v))
+ }
+ }
+ return model.ToEscapingScheme(s)
+}
+
+// ConvertClassicHistogramsToNHCBEnabled returns whether to convert classic histograms to NHCB.
+func (c *ScrapeConfig) ConvertClassicHistogramsToNHCBEnabled() bool {
+ return c.ConvertClassicHistogramsToNHCB != nil && *c.ConvertClassicHistogramsToNHCB
+}
+
+// AlwaysScrapeClassicHistogramsEnabled returns whether to always scrape classic histograms.
+func (c *ScrapeConfig) AlwaysScrapeClassicHistogramsEnabled() bool {
+ return c.AlwaysScrapeClassicHistograms != nil && *c.AlwaysScrapeClassicHistograms
+}
+
// StorageConfig configures runtime reloadable configuration options.
type StorageConfig struct {
TSDBConfig *TSDBConfig `yaml:"tsdb,omitempty"`
@@ -919,6 +1120,7 @@ func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig {
// AlertmanagerAPIVersion represents a version of the
// github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'.
+// 'v1' is no longer supported.
type AlertmanagerAPIVersion string
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -929,13 +1131,11 @@ func (v *AlertmanagerAPIVersion) UnmarshalYAML(unmarshal func(interface{}) error
return err
}
- for _, supportedVersion := range SupportedAlertmanagerAPIVersions {
- if *v == supportedVersion {
- return nil
- }
+ if !slices.Contains(SupportedAlertmanagerAPIVersions, *v) {
+ return fmt.Errorf("expected Alertmanager api version to be one of %v but got %v", SupportedAlertmanagerAPIVersions, *v)
}
- return fmt.Errorf("expected Alertmanager api version to be one of %v but got %v", SupportedAlertmanagerAPIVersions, *v)
+ return nil
}
const (
@@ -948,7 +1148,7 @@ const (
)
var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{
- AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2,
+ AlertmanagerAPIVersionV2,
}
// AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with.
@@ -1000,7 +1200,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil
if httpClientConfigAuthEnabled && c.SigV4Config != nil {
- return fmt.Errorf("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured")
+ return errors.New("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured")
}
// Check for users putting URLs in target groups.
@@ -1085,8 +1285,9 @@ func (m RemoteWriteProtoMsgs) String() string {
}
var (
- // RemoteWriteProtoMsgV1 represents the deprecated `prometheus.WriteRequest` protobuf
- // message introduced in the https://prometheus.io/docs/specs/remote_write_spec/.
+ // RemoteWriteProtoMsgV1 represents the `prometheus.WriteRequest` protobuf
+ // message introduced in the https://prometheus.io/docs/specs/remote_write_spec/,
+ // which will eventually be deprecated.
//
// NOTE: This string is used for both HTTP header values and config value, so don't change
// this reference.
@@ -1108,6 +1309,7 @@ type RemoteWriteConfig struct {
Name string `yaml:"name,omitempty"`
SendExemplars bool `yaml:"send_exemplars,omitempty"`
SendNativeHistograms bool `yaml:"send_native_histograms,omitempty"`
+ RoundRobinDNS bool `yaml:"round_robin_dns,omitempty"`
// ProtobufMessage specifies the protobuf message to use against the remote
// receiver as specified in https://prometheus.io/docs/specs/remote_write_spec_2_0/
ProtobufMessage RemoteWriteProtoMsg `yaml:"protobuf_message,omitempty"`
@@ -1119,6 +1321,7 @@ type RemoteWriteConfig struct {
MetadataConfig MetadataConfig `yaml:"metadata_config,omitempty"`
SigV4Config *sigv4.SigV4Config `yaml:"sigv4,omitempty"`
AzureADConfig *azuread.AzureADConfig `yaml:"azuread,omitempty"`
+ GoogleIAMConfig *googleiam.Config `yaml:"google_iam,omitempty"`
}
// SetDirectory joins any relative file paths with dir.
@@ -1156,17 +1359,33 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
return err
}
- httpClientConfigAuthEnabled := c.HTTPClientConfig.BasicAuth != nil ||
- c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil
+ return validateAuthConfigs(c)
+}
- if httpClientConfigAuthEnabled && (c.SigV4Config != nil || c.AzureADConfig != nil) {
- return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
+// validateAuthConfigs validates that at most one of basic_auth, authorization, oauth2, sigv4, azuread or google_iam must be configured.
+func validateAuthConfigs(c *RemoteWriteConfig) error {
+ var authConfigured []string
+ if c.HTTPClientConfig.BasicAuth != nil {
+ authConfigured = append(authConfigured, "basic_auth")
}
-
- if c.SigV4Config != nil && c.AzureADConfig != nil {
- return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
+ if c.HTTPClientConfig.Authorization != nil {
+ authConfigured = append(authConfigured, "authorization")
+ }
+ if c.HTTPClientConfig.OAuth2 != nil {
+ authConfigured = append(authConfigured, "oauth2")
+ }
+ if c.SigV4Config != nil {
+ authConfigured = append(authConfigured, "sigv4")
+ }
+ if c.AzureADConfig != nil {
+ authConfigured = append(authConfigured, "azuread")
+ }
+ if c.GoogleIAMConfig != nil {
+ authConfigured = append(authConfigured, "google_iam")
+ }
+ if len(authConfigured) > 1 {
+ return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, azuread or google_iam must be configured. Currently configured: %v", authConfigured)
}
-
return nil
}
@@ -1185,7 +1404,7 @@ func validateHeadersForTracing(headers map[string]string) error {
func validateHeaders(headers map[string]string) error {
for header := range headers {
if strings.ToLower(header) == "authorization" {
- return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter")
+ return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, azuread or google_iam parameter")
}
if _, ok := reservedHeaders[strings.ToLower(header)]; ok {
return fmt.Errorf("%s is a reserved header. It must not be changed", header)
@@ -1233,13 +1452,20 @@ type MetadataConfig struct {
MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
}
+const (
+ // DefaultChunkedReadLimit is the default value for the maximum size of the protobuf frame client allows.
+ // 50MB is the default. This is equivalent to ~100k full XOR chunks and average labelset.
+ DefaultChunkedReadLimit = 5e+7
+)
+
// RemoteReadConfig is the configuration for reading from remote storage.
type RemoteReadConfig struct {
- URL *config.URL `yaml:"url"`
- RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
- Headers map[string]string `yaml:"headers,omitempty"`
- ReadRecent bool `yaml:"read_recent,omitempty"`
- Name string `yaml:"name,omitempty"`
+ URL *config.URL `yaml:"url"`
+ RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
+ ChunkedReadLimit uint64 `yaml:"chunked_read_limit,omitempty"`
+ Headers map[string]string `yaml:"headers,omitempty"`
+ ReadRecent bool `yaml:"read_recent,omitempty"`
+ Name string `yaml:"name,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@@ -1289,7 +1515,7 @@ func fileErr(filename string, err error) error {
return fmt.Errorf("%q: %w", filePath(filename), err)
}
-func getGoGCEnv() int {
+func getGoGC() int {
goGCEnv := os.Getenv("GOGC")
// If the GOGC env var is set, use the same logic as upstream Go.
if goGCEnv != "" {
@@ -1302,5 +1528,93 @@ func getGoGCEnv() int {
return i
}
}
- return DefaultRuntimeConfig.GoGC
+ return DefaultGoGCPercentage
+}
+
+type translationStrategyOption string
+
+var (
+ // NoUTF8EscapingWithSuffixes will accept metric/label names as they are.
+ // Unit and type suffixes may be added to metric names, according to certain rules.
+ NoUTF8EscapingWithSuffixes translationStrategyOption = "NoUTF8EscapingWithSuffixes"
+ // UnderscoreEscapingWithSuffixes is the default option for translating OTLP to Prometheus.
+ // This option will translate metric name characters that are not alphanumerics/underscores/colons to underscores,
+ // and label name characters that are not alphanumerics/underscores to underscores.
+ // Unit and type suffixes may be appended to metric names, according to certain rules.
+ UnderscoreEscapingWithSuffixes translationStrategyOption = "UnderscoreEscapingWithSuffixes"
+ // NoTranslation (EXPERIMENTAL): disables all translation of incoming metric
+ // and label names. This offers a way for the OTLP users to use native metric names, reducing confusion.
+ //
+ // WARNING: This setting has significant known risks and limitations (see
+ // https://prometheus.io/docs/practices/naming/ for details):
+ // * Impaired UX when using PromQL in plain YAML (e.g. alerts, rules, dashboard, autoscaling configuration).
+ // * Series collisions which in the best case may result in OOO errors, in the worst case a silently malformed
+ // time series. For instance, you may end up in situation of ingesting `foo.bar` series with unit
+ // `seconds` and a separate series `foo.bar` with unit `milliseconds`.
+ //
+ // As a result, this setting is experimental and currently, should not be used in
+ // production systems.
+ //
+ // TODO(ArthurSens): Mention `type-and-unit-labels` feature (https://github.com/prometheus/proposals/pull/39) once released, as potential mitigation of the above risks.
+ NoTranslation translationStrategyOption = "NoTranslation"
+)
+
+// OTLPConfig is the configuration for writing to the OTLP endpoint.
+type OTLPConfig struct {
+ PromoteAllResourceAttributes bool `yaml:"promote_all_resource_attributes,omitempty"`
+ PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"`
+ IgnoreResourceAttributes []string `yaml:"ignore_resource_attributes,omitempty"`
+ TranslationStrategy translationStrategyOption `yaml:"translation_strategy,omitempty"`
+ KeepIdentifyingResourceAttributes bool `yaml:"keep_identifying_resource_attributes,omitempty"`
+ ConvertHistogramsToNHCB bool `yaml:"convert_histograms_to_nhcb,omitempty"`
+ // PromoteScopeMetadata controls whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
+ // As per OTel spec, the aforementioned scope metadata should be identifying, i.e. made into metric labels.
+ PromoteScopeMetadata bool `yaml:"promote_scope_metadata,omitempty"`
+}
+
+// UnmarshalYAML implements the yaml.Unmarshaler interface.
+func (c *OTLPConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
+ *c = DefaultOTLPConfig
+ type plain OTLPConfig
+ if err := unmarshal((*plain)(c)); err != nil {
+ return err
+ }
+
+ if c.PromoteAllResourceAttributes {
+ if len(c.PromoteResourceAttributes) > 0 {
+ return errors.New("'promote_all_resource_attributes' and 'promote_resource_attributes' cannot be configured simultaneously")
+ }
+ if err := sanitizeAttributes(c.IgnoreResourceAttributes, "ignored"); err != nil {
+ return fmt.Errorf("invalid 'ignore_resource_attributes': %w", err)
+ }
+ } else {
+ if len(c.IgnoreResourceAttributes) > 0 {
+ return errors.New("'ignore_resource_attributes' cannot be configured unless 'promote_all_resource_attributes' is true")
+ }
+ if err := sanitizeAttributes(c.PromoteResourceAttributes, "promoted"); err != nil {
+ return fmt.Errorf("invalid 'promote_resource_attributes': %w", err)
+ }
+ }
+
+ return nil
+}
+
+func sanitizeAttributes(attributes []string, adjective string) error {
+ seen := map[string]struct{}{}
+ var err error
+ for i, attr := range attributes {
+ attr = strings.TrimSpace(attr)
+ if attr == "" {
+ err = errors.Join(err, fmt.Errorf("empty %s OTel resource attribute", adjective))
+ continue
+ }
+ if _, exists := seen[attr]; exists {
+ err = errors.Join(err, fmt.Errorf("duplicated %s OTel resource attribute %q", adjective, attr))
+ continue
+ }
+
+ seen[attr] = struct{}{}
+ attributes[i] = attr
+ }
+ return err
}
diff --git a/config/config_default_test.go b/config/config_default_test.go
index 31133f1e04..e5f43e1f50 100644
--- a/config/config_default_test.go
+++ b/config/config_default_test.go
@@ -18,8 +18,11 @@ package config
const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml"
var ruleFilesExpectedConf = &Config{
+ loaded: true,
+
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
+ OTLPConfig: DefaultOTLPConfig,
RuleFiles: []string{
"testdata/first.rules",
"testdata/rules/second.rules",
diff --git a/config/config_test.go b/config/config_test.go
index 3c4907a46c..0673748b98 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -15,7 +15,7 @@ package config
import (
"crypto/tls"
- "encoding/json"
+ "fmt"
"net/url"
"os"
"path/filepath"
@@ -23,10 +23,10 @@ import (
"time"
"github.com/alecthomas/units"
- "github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -50,6 +50,7 @@ import (
"github.com/prometheus/prometheus/discovery/ovhcloud"
"github.com/prometheus/prometheus/discovery/puppetdb"
"github.com/prometheus/prometheus/discovery/scaleway"
+ "github.com/prometheus/prometheus/discovery/stackit"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/discovery/triton"
"github.com/prometheus/prometheus/discovery/uyuni"
@@ -69,6 +70,10 @@ func mustParseURL(u string) *config.URL {
return &config.URL{URL: parsed}
}
+func boolPtr(b bool) *bool {
+ return &b
+}
+
const (
globBodySizeLimit = 15 * units.MiB
globSampleLimit = 1500
@@ -77,24 +82,29 @@ const (
globLabelNameLengthLimit = 200
globLabelValueLengthLimit = 200
globalGoGC = 42
+ globScrapeFailureLogFile = "testdata/fail.log"
)
var expectedConf = &Config{
+ loaded: true,
GlobalConfig: GlobalConfig{
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EvaluationInterval: model.Duration(30 * time.Second),
- QueryLogFile: "",
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EvaluationInterval: model.Duration(30 * time.Second),
+ QueryLogFile: "testdata/query.log",
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
ExternalLabels: labels.FromStrings("foo", "bar", "monitor", "codelab"),
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ AlwaysScrapeClassicHistograms: false,
+ ConvertClassicHistogramsToNHCB: false,
},
Runtime: RuntimeConfig{
@@ -134,7 +144,7 @@ var expectedConf = &Config{
},
},
FollowRedirects: true,
- EnableHTTP2: true,
+ EnableHTTP2: false,
},
},
{
@@ -150,18 +160,26 @@ var expectedConf = &Config{
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
},
FollowRedirects: true,
- EnableHTTP2: true,
+ EnableHTTP2: false,
},
Headers: map[string]string{"name": "value"},
},
},
+ OTLPConfig: OTLPConfig{
+ PromoteResourceAttributes: []string{
+ "k8s.cluster.name", "k8s.job.name", "k8s.namespace.name",
+ },
+ TranslationStrategy: UnderscoreEscapingWithSuffixes,
+ },
+
RemoteReadConfigs: []*RemoteReadConfig{
{
- URL: mustParseURL("http://remote1/read"),
- RemoteTimeout: model.Duration(1 * time.Minute),
- ReadRecent: true,
- Name: "default",
+ URL: mustParseURL("http://remote1/read"),
+ RemoteTimeout: model.Duration(1 * time.Minute),
+ ChunkedReadLimit: DefaultChunkedReadLimit,
+ ReadRecent: true,
+ Name: "default",
HTTPClientConfig: config.HTTPClientConfig{
FollowRedirects: true,
EnableHTTP2: false,
@@ -171,6 +189,7 @@ var expectedConf = &Config{
{
URL: mustParseURL("http://remote3/read"),
RemoteTimeout: model.Duration(1 * time.Minute),
+ ChunkedReadLimit: DefaultChunkedReadLimit,
ReadRecent: false,
Name: "read_special",
RequiredMatchers: model.LabelSet{"job": "special"},
@@ -190,18 +209,24 @@ var expectedConf = &Config{
{
JobName: "prometheus",
- HonorLabels: true,
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorLabels: true,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFallbackProtocol: PrometheusText0_0_4,
+ ScrapeFailureLogFile: "testdata/fail_prom.log",
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -216,6 +241,15 @@ var expectedConf = &Config{
TLSConfig: config.TLSConfig{
MinVersion: config.TLSVersion(tls.VersionTLS10),
},
+ HTTPHeaders: &config.Headers{
+ Headers: map[string]config.Header{
+ "foo": {
+ Values: []string{"foobar"},
+ Secrets: []config.Secret{"bar", "foo"},
+ Files: []string{filepath.FromSlash("testdata/valid_password_file")},
+ },
+ },
+ },
},
ServiceDiscoveryConfigs: discovery.Configs{
@@ -294,17 +328,22 @@ var expectedConf = &Config{
{
JobName: "service-x",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(50 * time.Second),
- ScrapeTimeout: model.Duration(5 * time.Second),
- EnableCompression: true,
- BodySizeLimit: 10 * units.MiB,
- SampleLimit: 1000,
- TargetLimit: 35,
- LabelLimit: 35,
- LabelNameLengthLimit: 210,
- LabelValueLengthLimit: 210,
- ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4},
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(50 * time.Second),
+ ScrapeTimeout: model.Duration(5 * time.Second),
+ EnableCompression: true,
+ BodySizeLimit: 10 * units.MiB,
+ SampleLimit: 1000,
+ TargetLimit: 35,
+ LabelLimit: 35,
+ LabelNameLengthLimit: 210,
+ LabelValueLengthLimit: 210,
+ ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4},
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
HTTPClientConfig: config.HTTPClientConfig{
BasicAuth: &config.BasicAuth{
@@ -391,17 +430,22 @@ var expectedConf = &Config{
{
JobName: "service-y",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -446,17 +490,22 @@ var expectedConf = &Config{
{
JobName: "service-z",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: model.Duration(10 * time.Second),
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: model.Duration(10 * time.Second),
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: "/metrics",
Scheme: "http",
@@ -479,17 +528,22 @@ var expectedConf = &Config{
{
JobName: "service-kubernetes",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -518,17 +572,22 @@ var expectedConf = &Config{
{
JobName: "service-kubernetes-namespaces",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -557,17 +616,22 @@ var expectedConf = &Config{
{
JobName: "service-kuma",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -586,17 +650,22 @@ var expectedConf = &Config{
{
JobName: "service-marathon",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -623,17 +692,22 @@ var expectedConf = &Config{
{
JobName: "service-nomad",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -657,17 +731,22 @@ var expectedConf = &Config{
{
JobName: "service-ec2",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -698,17 +777,22 @@ var expectedConf = &Config{
{
JobName: "service-lightsail",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -729,17 +813,22 @@ var expectedConf = &Config{
{
JobName: "service-azure",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -763,17 +852,22 @@ var expectedConf = &Config{
{
JobName: "service-nerve",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -790,17 +884,22 @@ var expectedConf = &Config{
{
JobName: "0123service-xxx",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -820,17 +919,22 @@ var expectedConf = &Config{
{
JobName: "badfederation",
- HonorTimestamps: false,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: false,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: "/federate",
Scheme: DefaultScrapeConfig.Scheme,
@@ -850,17 +954,22 @@ var expectedConf = &Config{
{
JobName: "測試",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -880,17 +989,22 @@ var expectedConf = &Config{
{
JobName: "httpsd",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -907,17 +1021,22 @@ var expectedConf = &Config{
{
JobName: "service-triton",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -942,17 +1061,22 @@ var expectedConf = &Config{
{
JobName: "digitalocean-droplets",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -976,17 +1100,22 @@ var expectedConf = &Config{
{
JobName: "docker",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1007,17 +1136,22 @@ var expectedConf = &Config{
{
JobName: "dockerswarm",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1037,17 +1171,22 @@ var expectedConf = &Config{
{
JobName: "service-openstack",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1071,17 +1210,22 @@ var expectedConf = &Config{
{
JobName: "service-puppetdb",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1107,18 +1251,23 @@ var expectedConf = &Config{
},
},
{
- JobName: "hetzner",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ JobName: "hetzner",
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1164,17 +1313,22 @@ var expectedConf = &Config{
{
JobName: "service-eureka",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1191,17 +1345,22 @@ var expectedConf = &Config{
{
JobName: "ovhcloud",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@@ -1229,17 +1388,22 @@ var expectedConf = &Config{
{
JobName: "scaleway",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@@ -1273,17 +1437,22 @@ var expectedConf = &Config{
{
JobName: "linode-instances",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1305,20 +1474,64 @@ var expectedConf = &Config{
},
},
},
+ {
+ JobName: "stackit-servers",
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
+
+ MetricsPath: DefaultScrapeConfig.MetricsPath,
+ Scheme: DefaultScrapeConfig.Scheme,
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+ ServiceDiscoveryConfigs: discovery.Configs{
+ &stackit.SDConfig{
+ Project: "11111111-1111-1111-1111-111111111111",
+ Region: "eu01",
+ HTTPClientConfig: config.HTTPClientConfig{
+ Authorization: &config.Authorization{
+ Type: "Bearer",
+ Credentials: "abcdef",
+ },
+ FollowRedirects: true,
+ EnableHTTP2: true,
+ },
+ Port: 80,
+ RefreshInterval: model.Duration(60 * time.Second),
+ },
+ },
+ },
{
JobName: "uyuni",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@@ -1336,18 +1549,24 @@ var expectedConf = &Config{
},
},
{
- JobName: "ionos",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ JobName: "ionos",
+
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1369,17 +1588,22 @@ var expectedConf = &Config{
{
JobName: "vultr",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- EnableCompression: true,
- BodySizeLimit: globBodySizeLimit,
- SampleLimit: globSampleLimit,
- TargetLimit: globTargetLimit,
- LabelLimit: globLabelLimit,
- LabelNameLengthLimit: globLabelNameLengthLimit,
- LabelValueLengthLimit: globLabelValueLengthLimit,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ EnableCompression: true,
+ BodySizeLimit: globBodySizeLimit,
+ SampleLimit: globSampleLimit,
+ TargetLimit: globTargetLimit,
+ LabelLimit: globLabelLimit,
+ LabelNameLengthLimit: globLabelNameLengthLimit,
+ LabelValueLengthLimit: globLabelValueLengthLimit,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ ScrapeFailureLogFile: globScrapeFailureLogFile,
+ MetricNameValidationScheme: DefaultGlobalConfig.MetricNameValidationScheme,
+ MetricNameEscapingScheme: DefaultGlobalConfig.MetricNameEscapingScheme,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -1444,46 +1668,295 @@ var expectedConf = &Config{
},
}
+func TestYAMLNotLongerSupportedAMApi(t *testing.T) {
+ _, err := LoadFile("testdata/config_with_no_longer_supported_am_api_config.yml", false, promslog.NewNopLogger())
+ require.Error(t, err)
+}
+
func TestYAMLRoundtrip(t *testing.T) {
- want, err := LoadFile("testdata/roundtrip.good.yml", false, false, log.NewNopLogger())
+ want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
-
require.NoError(t, err)
- got := &Config{}
- require.NoError(t, yaml.UnmarshalStrict(out, got))
+
+ got, err := Load(string(out), promslog.NewNopLogger())
+ require.NoError(t, err)
require.Equal(t, want, got)
}
func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
- want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, log.NewNopLogger())
+ want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
-
require.NoError(t, err)
- got := &Config{}
- require.NoError(t, yaml.UnmarshalStrict(out, got))
+
+ got, err := Load(string(out), promslog.NewNopLogger())
+ require.NoError(t, err)
require.True(t, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
require.False(t, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
}
+func TestOTLPSanitizeResourceAttributes(t *testing.T) {
+ t.Run("good config - default resource attributes", func(t *testing.T) {
+ want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_default_resource_attributes.good.yml"), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+ require.NoError(t, err)
+ var got Config
+ require.NoError(t, yaml.UnmarshalStrict(out, &got))
+
+ require.False(t, got.OTLPConfig.PromoteAllResourceAttributes)
+ require.Empty(t, got.OTLPConfig.IgnoreResourceAttributes)
+ require.Empty(t, got.OTLPConfig.PromoteResourceAttributes)
+ })
+
+ t.Run("good config - promote resource attributes", func(t *testing.T) {
+ want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_promote_resource_attributes.good.yml"), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+ require.NoError(t, err)
+ var got Config
+ require.NoError(t, yaml.UnmarshalStrict(out, &got))
+
+ require.False(t, got.OTLPConfig.PromoteAllResourceAttributes)
+ require.Empty(t, got.OTLPConfig.IgnoreResourceAttributes)
+ require.Equal(t, []string{"k8s.cluster.name", "k8s.job.name", "k8s.namespace.name"}, got.OTLPConfig.PromoteResourceAttributes)
+ })
+
+ t.Run("bad config - promote resource attributes", func(t *testing.T) {
+ _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_promote_resource_attributes.bad.yml"), false, promslog.NewNopLogger())
+ require.ErrorContains(t, err, `invalid 'promote_resource_attributes'`)
+ require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`)
+ require.ErrorContains(t, err, `empty promoted OTel resource attribute`)
+ })
+
+ t.Run("good config - promote all resource attributes", func(t *testing.T) {
+ want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes_promote_all.good.yml"), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+ require.NoError(t, err)
+ var got Config
+ require.NoError(t, yaml.UnmarshalStrict(out, &got))
+ require.True(t, got.OTLPConfig.PromoteAllResourceAttributes)
+ require.Empty(t, got.OTLPConfig.PromoteResourceAttributes)
+ require.Empty(t, got.OTLPConfig.IgnoreResourceAttributes)
+ })
+
+ t.Run("good config - ignore resource attributes", func(t *testing.T) {
+ want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_ignore_resource_attributes.good.yml"), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+ require.NoError(t, err)
+ var got Config
+ require.NoError(t, yaml.UnmarshalStrict(out, &got))
+ require.True(t, got.OTLPConfig.PromoteAllResourceAttributes)
+ require.Empty(t, got.OTLPConfig.PromoteResourceAttributes)
+ require.Equal(t, []string{"k8s.cluster.name", "k8s.job.name", "k8s.namespace.name"}, got.OTLPConfig.IgnoreResourceAttributes)
+ })
+
+ t.Run("bad config - ignore resource attributes", func(t *testing.T) {
+ _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_ignore_resource_attributes.bad.yml"), false, promslog.NewNopLogger())
+ require.ErrorContains(t, err, `invalid 'ignore_resource_attributes'`)
+ require.ErrorContains(t, err, `duplicated ignored OTel resource attribute "k8s.job.name"`)
+ require.ErrorContains(t, err, `empty ignored OTel resource attribute`)
+ })
+
+ t.Run("bad config - conflict between promote all and promote specific resource attributes", func(t *testing.T) {
+ _, err := LoadFile(filepath.Join("testdata", "otlp_promote_all_resource_attributes.bad.yml"), false, promslog.NewNopLogger())
+ require.ErrorContains(t, err, `'promote_all_resource_attributes' and 'promote_resource_attributes' cannot be configured simultaneously`)
+ })
+
+ t.Run("bad config - configuring ignoring of resource attributes without also enabling promotion of all resource attributes", func(t *testing.T) {
+ _, err := LoadFile(filepath.Join("testdata", "otlp_ignore_resource_attributes_without_promote_all.bad.yml"), false, promslog.NewNopLogger())
+ require.ErrorContains(t, err, `'ignore_resource_attributes' cannot be configured unless 'promote_all_resource_attributes' is true`)
+ })
+}
+
+func TestOTLPAllowServiceNameInTargetInfo(t *testing.T) {
+ t.Run("good config", func(t *testing.T) {
+ want, err := LoadFile(filepath.Join("testdata", "otlp_allow_keep_identifying_resource_attributes.good.yml"), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+ require.NoError(t, err)
+ var got Config
+ require.NoError(t, yaml.UnmarshalStrict(out, &got))
+
+ require.True(t, got.OTLPConfig.KeepIdentifyingResourceAttributes)
+ })
+}
+
+func TestOTLPConvertHistogramsToNHCB(t *testing.T) {
+ t.Run("good config", func(t *testing.T) {
+ want, err := LoadFile(filepath.Join("testdata", "otlp_convert_histograms_to_nhcb.good.yml"), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+ require.NoError(t, err)
+ var got Config
+ require.NoError(t, yaml.UnmarshalStrict(out, &got))
+
+ require.True(t, got.OTLPConfig.ConvertHistogramsToNHCB)
+ })
+}
+
+func TestOTLPPromoteScopeMetadata(t *testing.T) {
+ t.Run("good config", func(t *testing.T) {
+ want, err := LoadFile(filepath.Join("testdata", "otlp_promote_scope_metadata.good.yml"), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+ require.NoError(t, err)
+ var got Config
+ require.NoError(t, yaml.UnmarshalStrict(out, &got))
+
+ require.True(t, got.OTLPConfig.PromoteScopeMetadata)
+ })
+}
+
+func TestOTLPAllowUTF8(t *testing.T) {
+ t.Run("good config - NoUTF8EscapingWithSuffixes", func(t *testing.T) {
+ fpath := filepath.Join("testdata", "otlp_allow_utf8.good.yml")
+ verify := func(t *testing.T, conf *Config, err error) {
+ t.Helper()
+ require.NoError(t, err)
+ require.Equal(t, NoUTF8EscapingWithSuffixes, conf.OTLPConfig.TranslationStrategy)
+ }
+
+ t.Run("LoadFile", func(t *testing.T) {
+ conf, err := LoadFile(fpath, false, promslog.NewNopLogger())
+ verify(t, conf, err)
+ })
+ t.Run("Load", func(t *testing.T) {
+ content, err := os.ReadFile(fpath)
+ require.NoError(t, err)
+ conf, err := Load(string(content), promslog.NewNopLogger())
+ verify(t, conf, err)
+ })
+ })
+
+ t.Run("incompatible config - NoUTF8EscapingWithSuffixes", func(t *testing.T) {
+ fpath := filepath.Join("testdata", "otlp_allow_utf8.incompatible.yml")
+ verify := func(t *testing.T, err error) {
+ t.Helper()
+ require.ErrorContains(t, err, `OTLP translation strategy "NoUTF8EscapingWithSuffixes" is not allowed when UTF8 is disabled`)
+ }
+
+ t.Run("LoadFile", func(t *testing.T) {
+ _, err := LoadFile(fpath, false, promslog.NewNopLogger())
+ verify(t, err)
+ })
+ t.Run("Load", func(t *testing.T) {
+ content, err := os.ReadFile(fpath)
+ require.NoError(t, err)
+ _, err = Load(string(content), promslog.NewNopLogger())
+ t.Log("err", err)
+ verify(t, err)
+ })
+ })
+
+ t.Run("good config - NoTranslation", func(t *testing.T) {
+ fpath := filepath.Join("testdata", "otlp_no_translation.good.yml")
+ verify := func(t *testing.T, conf *Config, err error) {
+ t.Helper()
+ require.NoError(t, err)
+ require.Equal(t, NoTranslation, conf.OTLPConfig.TranslationStrategy)
+ }
+
+ t.Run("LoadFile", func(t *testing.T) {
+ conf, err := LoadFile(fpath, false, promslog.NewNopLogger())
+ verify(t, conf, err)
+ })
+ t.Run("Load", func(t *testing.T) {
+ content, err := os.ReadFile(fpath)
+ require.NoError(t, err)
+ conf, err := Load(string(content), promslog.NewNopLogger())
+ verify(t, conf, err)
+ })
+ })
+
+ t.Run("incompatible config - NoTranslation", func(t *testing.T) {
+ fpath := filepath.Join("testdata", "otlp_no_translation.incompatible.yml")
+ verify := func(t *testing.T, err error) {
+ t.Helper()
+ require.ErrorContains(t, err, `OTLP translation strategy "NoTranslation" is not allowed when UTF8 is disabled`)
+ }
+
+ t.Run("LoadFile", func(t *testing.T) {
+ _, err := LoadFile(fpath, false, promslog.NewNopLogger())
+ verify(t, err)
+ })
+ t.Run("Load", func(t *testing.T) {
+ content, err := os.ReadFile(fpath)
+ require.NoError(t, err)
+ _, err = Load(string(content), promslog.NewNopLogger())
+ t.Log("err", err)
+ verify(t, err)
+ })
+ })
+
+ t.Run("bad config", func(t *testing.T) {
+ fpath := filepath.Join("testdata", "otlp_allow_utf8.bad.yml")
+ verify := func(t *testing.T, err error) {
+ t.Helper()
+ require.ErrorContains(t, err, `unsupported OTLP translation strategy "Invalid"`)
+ }
+
+ t.Run("LoadFile", func(t *testing.T) {
+ _, err := LoadFile(fpath, false, promslog.NewNopLogger())
+ verify(t, err)
+ })
+ t.Run("Load", func(t *testing.T) {
+ content, err := os.ReadFile(fpath)
+ require.NoError(t, err)
+ _, err = Load(string(content), promslog.NewNopLogger())
+ verify(t, err)
+ })
+ })
+
+ t.Run("good config - missing otlp config uses default", func(t *testing.T) {
+ fpath := filepath.Join("testdata", "otlp_empty.yml")
+ verify := func(t *testing.T, conf *Config, err error) {
+ t.Helper()
+ require.NoError(t, err)
+ require.Equal(t, UnderscoreEscapingWithSuffixes, conf.OTLPConfig.TranslationStrategy)
+ }
+
+ t.Run("LoadFile", func(t *testing.T) {
+ conf, err := LoadFile(fpath, false, promslog.NewNopLogger())
+ verify(t, conf, err)
+ })
+ t.Run("Load", func(t *testing.T) {
+ content, err := os.ReadFile(fpath)
+ require.NoError(t, err)
+ conf, err := Load(string(content), promslog.NewNopLogger())
+ verify(t, conf, err)
+ })
+ })
+}
+
func TestLoadConfig(t *testing.T) {
// Parse a valid file that sets a global scrape timeout. This tests whether parsing
// an overwritten default field in the global config permanently changes the default.
- _, err := LoadFile("testdata/global_timeout.good.yml", false, false, log.NewNopLogger())
+ _, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
- c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger())
+ c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger())
+
require.NoError(t, err)
require.Equal(t, expectedConf, c)
}
func TestScrapeIntervalLarger(t *testing.T) {
- c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger())
+ c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.ScrapeConfigs, 1)
for _, sc := range c.ScrapeConfigs {
@@ -1493,7 +1966,7 @@ func TestScrapeIntervalLarger(t *testing.T) {
// YAML marshaling must not reveal authentication credentials.
func TestElideSecrets(t *testing.T) {
- c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger())
+ c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
secretRe := regexp.MustCompile(`\\u003csecret\\u003e|`)
@@ -1503,38 +1976,38 @@ func TestElideSecrets(t *testing.T) {
yamlConfig := string(config)
matches := secretRe.FindAllStringIndex(yamlConfig, -1)
- require.Len(t, matches, 22, "wrong number of secret matches found")
+ require.Len(t, matches, 25, "wrong number of secret matches found")
require.NotContains(t, yamlConfig, "mysecret",
"yaml marshal reveals authentication credentials.")
}
func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) {
// Parse a valid file that sets a rule files with an absolute path
- c, err := LoadFile(ruleFilesConfigFile, false, false, log.NewNopLogger())
+ c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger())
require.NoError(t, err)
require.Equal(t, ruleFilesExpectedConf, c)
}
func TestKubernetesEmptyAPIServer(t *testing.T) {
- _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, log.NewNopLogger())
+ _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
func TestKubernetesWithKubeConfig(t *testing.T) {
- _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, log.NewNopLogger())
+ _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
func TestKubernetesSelectors(t *testing.T) {
- _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, log.NewNopLogger())
+ _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
- _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, log.NewNopLogger())
+ _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
- _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, log.NewNopLogger())
+ _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
- _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, log.NewNopLogger())
+ _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
- _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, log.NewNopLogger())
+ _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
@@ -1556,11 +2029,7 @@ var expectedErrors = []struct {
},
{
filename: "labelname.bad.yml",
- errMsg: `"not$allowed" is not a valid label name`,
- },
- {
- filename: "labelname2.bad.yml",
- errMsg: `"not:allowed" is not a valid label name`,
+ errMsg: `"\xff" is not a valid label name`,
},
{
filename: "labelvalue.bad.yml",
@@ -1632,16 +2101,12 @@ var expectedErrors = []struct {
},
{
filename: "labelmap.bad.yml",
- errMsg: "\"l-$1\" is invalid 'replacement' for labelmap action",
+ errMsg: "!!binary value contains invalid base64 data",
},
{
filename: "lowercase.bad.yml",
errMsg: "relabel configuration for lowercase action requires 'target_label' value",
},
- {
- filename: "lowercase2.bad.yml",
- errMsg: "\"42lab\" is invalid 'target_label' for lowercase action",
- },
{
filename: "lowercase3.bad.yml",
errMsg: "'replacement' can not be set for lowercase action",
@@ -1650,10 +2115,6 @@ var expectedErrors = []struct {
filename: "uppercase.bad.yml",
errMsg: "relabel configuration for uppercase action requires 'target_label' value",
},
- {
- filename: "uppercase2.bad.yml",
- errMsg: "\"42lab\" is invalid 'target_label' for uppercase action",
- },
{
filename: "uppercase3.bad.yml",
errMsg: "'replacement' can not be set for uppercase action",
@@ -1800,7 +2261,7 @@ var expectedErrors = []struct {
},
{
filename: "remote_write_authorization_header.bad.yml",
- errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter`,
+ errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, azuread or google_iam parameter`,
},
{
filename: "remote_write_wrong_msg.bad.yml",
@@ -2004,31 +2465,38 @@ var expectedErrors = []struct {
},
{
filename: "scrape_config_files_scrape_protocols.bad.yml",
- errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`,
+ errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0] for scrape config with job name "node"`,
},
{
filename: "scrape_config_files_scrape_protocols2.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`,
},
+ {
+ filename: "scrape_config_files_fallback_scrape_protocol1.bad.yml",
+ errMsg: `parsing YAML file testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml: invalid fallback_scrape_protocol for scrape config with job name "node": unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0]`,
+ },
+ {
+ filename: "scrape_config_files_fallback_scrape_protocol2.bad.yml",
+ errMsg: `unmarshal errors`,
+ },
+ {
+ filename: "scrape_config_utf8_conflicting.bad.yml",
+ errMsg: `utf8 metric names requested but validation scheme is not set to UTF8`,
+ },
+ {
+ filename: "stackit_endpoint.bad.yml",
+ errMsg: "invalid endpoint",
+ },
}
func TestBadConfigs(t *testing.T) {
for _, ee := range expectedErrors {
- _, err := LoadFile("testdata/"+ee.filename, false, false, log.NewNopLogger())
- require.Error(t, err, "%s", ee.filename)
- require.Contains(t, err.Error(), ee.errMsg,
+ _, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger())
+ require.ErrorContains(t, err, ee.errMsg,
"Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err)
}
}
-func TestBadStaticConfigsJSON(t *testing.T) {
- content, err := os.ReadFile("testdata/static_config.bad.json")
- require.NoError(t, err)
- var tg targetgroup.Group
- err = json.Unmarshal(content, &tg)
- require.Error(t, err)
-}
-
func TestBadStaticConfigsYML(t *testing.T) {
content, err := os.ReadFile("testdata/static_config.bad.yml")
require.NoError(t, err)
@@ -2038,48 +2506,46 @@ func TestBadStaticConfigsYML(t *testing.T) {
}
func TestEmptyConfig(t *testing.T) {
- c, err := Load("", false, log.NewNopLogger())
+ c, err := Load("", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
+ exp.loaded = true
require.Equal(t, exp, *c)
+ require.Equal(t, 75, c.Runtime.GoGC)
}
func TestExpandExternalLabels(t *testing.T) {
// Cleanup ant TEST env variable that could exist on the system.
os.Setenv("TEST", "")
- c, err := LoadFile("testdata/external_labels.good.yml", false, false, log.NewNopLogger())
- require.NoError(t, err)
- testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels)
-
- c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
+ c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels)
os.Setenv("TEST", "TestValue")
- c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
+ c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels)
}
func TestAgentMode(t *testing.T) {
- _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, log.NewNopLogger())
+ _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field alerting is not allowed in agent mode")
- _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, log.NewNopLogger())
+ _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field alerting is not allowed in agent mode")
- _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, log.NewNopLogger())
+ _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field rule_files is not allowed in agent mode")
- _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, log.NewNopLogger())
+ _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field remote_read is not allowed in agent mode")
- c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger())
+ c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger())
require.NoError(t, err)
require.Empty(t, c.RemoteWriteConfigs)
- c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger())
+ c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.RemoteWriteConfigs, 1)
require.Equal(
@@ -2090,21 +2556,33 @@ func TestAgentMode(t *testing.T) {
}
func TestEmptyGlobalBlock(t *testing.T) {
- c, err := Load("global:\n", false, log.NewNopLogger())
+ c, err := Load("global:\n", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
- exp.Runtime = DefaultRuntimeConfig
+ exp.loaded = true
require.Equal(t, exp, *c)
}
+// ScrapeConfigOptions contains options for creating a scrape config.
+type ScrapeConfigOptions struct {
+ JobName string
+ ScrapeInterval model.Duration
+ ScrapeTimeout model.Duration
+ AlwaysScrapeClassicHistograms bool
+ ConvertClassicHistToNHCB bool
+}
+
func TestGetScrapeConfigs(t *testing.T) {
- sc := func(jobName string, scrapeInterval, scrapeTimeout model.Duration) *ScrapeConfig {
+ // Helper function to create a scrape config with the given options.
+ sc := func(opts ScrapeConfigOptions) *ScrapeConfig {
return &ScrapeConfig{
- JobName: jobName,
- HonorTimestamps: true,
- ScrapeInterval: scrapeInterval,
- ScrapeTimeout: scrapeTimeout,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ JobName: opts.JobName,
+ HonorTimestamps: true,
+ ScrapeInterval: opts.ScrapeInterval,
+ ScrapeTimeout: opts.ScrapeTimeout,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ MetricNameValidationScheme: model.UTF8Validation,
+ MetricNameEscapingScheme: model.AllowUTF8,
MetricsPath: "/metrics",
Scheme: "http",
@@ -2122,6 +2600,8 @@ func TestGetScrapeConfigs(t *testing.T) {
},
},
},
+ AlwaysScrapeClassicHistograms: boolPtr(opts.AlwaysScrapeClassicHistograms),
+ ConvertClassicHistogramsToNHCB: boolPtr(opts.ConvertClassicHistToNHCB),
}
}
@@ -2134,33 +2614,37 @@ func TestGetScrapeConfigs(t *testing.T) {
{
name: "An included config file should be a valid global config.",
configFile: "testdata/scrape_config_files.good.yml",
- expectedResult: []*ScrapeConfig{sc("prometheus", model.Duration(60*time.Second), model.Duration(10*time.Second))},
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false})},
},
{
- name: "An global config that only include a scrape config file.",
+ name: "A global config that only include a scrape config file.",
configFile: "testdata/scrape_config_files_only.good.yml",
- expectedResult: []*ScrapeConfig{sc("prometheus", model.Duration(60*time.Second), model.Duration(10*time.Second))},
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false})},
},
{
- name: "An global config that combine scrape config files and scrape configs.",
+ name: "A global config that combine scrape config files and scrape configs.",
configFile: "testdata/scrape_config_files_combined.good.yml",
expectedResult: []*ScrapeConfig{
- sc("node", model.Duration(60*time.Second), model.Duration(10*time.Second)),
- sc("prometheus", model.Duration(60*time.Second), model.Duration(10*time.Second)),
- sc("alertmanager", model.Duration(60*time.Second), model.Duration(10*time.Second)),
+ sc(ScrapeConfigOptions{JobName: "node", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false}),
+ sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false}),
+ sc(ScrapeConfigOptions{JobName: "alertmanager", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false}),
},
},
{
- name: "An global config that includes a scrape config file with globs",
+ name: "A global config that includes a scrape config file with globs",
configFile: "testdata/scrape_config_files_glob.good.yml",
expectedResult: []*ScrapeConfig{
{
JobName: "prometheus",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(60 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(60 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ MetricNameValidationScheme: model.UTF8Validation,
+ MetricNameEscapingScheme: model.AllowUTF8,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@@ -2190,10 +2674,14 @@ func TestGetScrapeConfigs(t *testing.T) {
{
JobName: "node",
- HonorTimestamps: true,
- ScrapeInterval: model.Duration(15 * time.Second),
- ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
- ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ HonorTimestamps: true,
+ ScrapeInterval: model.Duration(15 * time.Second),
+ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
+ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
+ MetricNameValidationScheme: model.UTF8Validation,
+ MetricNameEscapingScheme: model.AllowUTF8,
+ AlwaysScrapeClassicHistograms: boolPtr(false),
+ ConvertClassicHistogramsToNHCB: boolPtr(false),
HTTPClientConfig: config.HTTPClientConfig{
TLSConfig: config.TLSConfig{
@@ -2227,25 +2715,60 @@ func TestGetScrapeConfigs(t *testing.T) {
},
},
{
- name: "An global config that includes twice the same scrape configs.",
+ name: "A global config that includes twice the same scrape configs.",
configFile: "testdata/scrape_config_files_double_import.bad.yml",
expectedError: `found multiple scrape configs with job name "prometheus"`,
},
{
- name: "An global config that includes a scrape config identical to a scrape config in the main file.",
+ name: "A global config that includes a scrape config identical to a scrape config in the main file.",
configFile: "testdata/scrape_config_files_duplicate.bad.yml",
expectedError: `found multiple scrape configs with job name "prometheus"`,
},
{
- name: "An global config that includes a scrape config file with errors.",
+ name: "A global config that includes a scrape config file with errors.",
configFile: "testdata/scrape_config_files_global.bad.yml",
expectedError: `scrape timeout greater than scrape interval for scrape config with job name "prometheus"`,
},
+ {
+ name: "A global config that enables convert classic histograms to nhcb.",
+ configFile: "testdata/global_convert_classic_hist_to_nhcb.good.yml",
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: true})},
+ },
+ {
+ name: "A global config that enables convert classic histograms to nhcb and scrape config that disables the conversion",
+ configFile: "testdata/local_disable_convert_classic_hist_to_nhcb.good.yml",
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false})},
+ },
+ {
+ name: "A global config that disables convert classic histograms to nhcb and scrape config that enables the conversion",
+ configFile: "testdata/local_convert_classic_hist_to_nhcb.good.yml",
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: true})},
+ },
+ {
+ name: "A global config that enables always scrape classic histograms",
+ configFile: "testdata/global_enable_always_scrape_classic_hist.good.yml",
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: true, ConvertClassicHistToNHCB: false})},
+ },
+ {
+ name: "A global config that disables always scrape classic histograms",
+ configFile: "testdata/global_disable_always_scrape_classic_hist.good.yml",
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false})},
+ },
+ {
+ name: "A global config that disables always scrape classic histograms and scrape config that enables it",
+ configFile: "testdata/local_enable_always_scrape_classic_hist.good.yml",
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: true, ConvertClassicHistToNHCB: false})},
+ },
+ {
+ name: "A global config that enables always scrape classic histograms and scrape config that disables it",
+ configFile: "testdata/local_disable_always_scrape_classic_hist.good.yml",
+ expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), AlwaysScrapeClassicHistograms: false, ConvertClassicHistToNHCB: false})},
+ },
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- c, err := LoadFile(tc.configFile, false, false, log.NewNopLogger())
+ c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger())
require.NoError(t, err)
scfgs, err := c.GetScrapeConfigs()
@@ -2263,7 +2786,7 @@ func kubernetesSDHostURL() config.URL {
}
func TestScrapeConfigDisableCompression(t *testing.T) {
- want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger())
+ want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
@@ -2274,3 +2797,163 @@ func TestScrapeConfigDisableCompression(t *testing.T) {
require.False(t, got.ScrapeConfigs[0].EnableCompression)
}
+
+func TestScrapeConfigNameValidationSettings(t *testing.T) {
+ tests := []struct {
+ name string
+ inputFile string
+ expectScheme model.ValidationScheme
+ }{
+ {
+ name: "blank config implies default",
+ inputFile: "scrape_config_default_validation_mode",
+ expectScheme: model.UTF8Validation,
+ },
+ {
+ name: "global setting implies local settings",
+ inputFile: "scrape_config_global_validation_mode",
+ expectScheme: model.LegacyValidation,
+ },
+ {
+ name: "local setting",
+ inputFile: "scrape_config_local_validation_mode",
+ expectScheme: model.LegacyValidation,
+ },
+ {
+ name: "local setting overrides global setting",
+ inputFile: "scrape_config_local_global_validation_mode",
+ expectScheme: model.UTF8Validation,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+
+ require.NoError(t, err)
+ got := &Config{}
+ require.NoError(t, yaml.UnmarshalStrict(out, got))
+
+ require.Equal(t, tc.expectScheme, got.ScrapeConfigs[0].MetricNameValidationScheme)
+ })
+ }
+}
+
+func TestScrapeConfigNameEscapingSettings(t *testing.T) {
+ tests := []struct {
+ name string
+ inputFile string
+ expectValidationScheme model.ValidationScheme
+ expectEscapingScheme string
+ }{
+ {
+ name: "blank config implies default",
+ inputFile: "scrape_config_default_validation_mode",
+ expectValidationScheme: model.UTF8Validation,
+ expectEscapingScheme: "allow-utf-8",
+ },
+ {
+ name: "global setting implies local settings",
+ inputFile: "scrape_config_global_validation_mode",
+ expectValidationScheme: model.LegacyValidation,
+ expectEscapingScheme: "dots",
+ },
+ {
+ name: "local setting",
+ inputFile: "scrape_config_local_validation_mode",
+ expectValidationScheme: model.LegacyValidation,
+ expectEscapingScheme: "values",
+ },
+ {
+ name: "local setting overrides global setting",
+ inputFile: "scrape_config_local_global_validation_mode",
+ expectValidationScheme: model.UTF8Validation,
+ expectEscapingScheme: "dots",
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ out, err := yaml.Marshal(want)
+
+ require.NoError(t, err)
+ got := &Config{}
+ require.NoError(t, yaml.UnmarshalStrict(out, got))
+
+ require.Equal(t, tc.expectValidationScheme, got.ScrapeConfigs[0].MetricNameValidationScheme)
+ require.Equal(t, tc.expectEscapingScheme, got.ScrapeConfigs[0].MetricNameEscapingScheme)
+ })
+ }
+}
+
+func TestScrapeProtocolHeader(t *testing.T) {
+ tests := []struct {
+ name string
+ proto ScrapeProtocol
+ expectedValue string
+ }{
+ {
+ name: "blank",
+ proto: ScrapeProtocol(""),
+ expectedValue: "",
+ },
+ {
+ name: "invalid",
+ proto: ScrapeProtocol("invalid"),
+ expectedValue: "",
+ },
+ {
+ name: "prometheus protobuf",
+ proto: PrometheusProto,
+ expectedValue: "application/vnd.google.protobuf",
+ },
+ {
+ name: "prometheus text 0.0.4",
+ proto: PrometheusText0_0_4,
+ expectedValue: "text/plain",
+ },
+ {
+ name: "prometheus text 1.0.0",
+ proto: PrometheusText1_0_0,
+ expectedValue: "text/plain",
+ },
+ {
+ name: "openmetrics 0.0.1",
+ proto: OpenMetricsText0_0_1,
+ expectedValue: "application/openmetrics-text",
+ },
+ {
+ name: "openmetrics 1.0.0",
+ proto: OpenMetricsText1_0_0,
+ expectedValue: "application/openmetrics-text",
+ },
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ mediaType := tc.proto.HeaderMediaType()
+
+ require.Equal(t, tc.expectedValue, mediaType)
+ })
+ }
+}
+
+// Regression test against https://github.com/prometheus/prometheus/issues/15538
+func TestGetScrapeConfigs_Loaded(t *testing.T) {
+ t.Run("without load", func(t *testing.T) {
+ c := &Config{}
+ _, err := c.GetScrapeConfigs()
+ require.EqualError(t, err, "scrape config cannot be fetched, main config was not validated and loaded correctly; should not happen")
+ })
+ t.Run("with load", func(t *testing.T) {
+ c, err := Load("", promslog.NewNopLogger())
+ require.NoError(t, err)
+ _, err = c.GetScrapeConfigs()
+ require.NoError(t, err)
+ })
+}
diff --git a/config/config_windows_test.go b/config/config_windows_test.go
index db4d46ef13..9d338b99e7 100644
--- a/config/config_windows_test.go
+++ b/config/config_windows_test.go
@@ -16,6 +16,8 @@ package config
const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml"
var ruleFilesExpectedConf = &Config{
+ loaded: true,
+
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
RuleFiles: []string{
diff --git a/config/reload.go b/config/reload.go
new file mode 100644
index 0000000000..cc0cc97158
--- /dev/null
+++ b/config/reload.go
@@ -0,0 +1,93 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+ "crypto/sha256"
+ "encoding/hex"
+ "fmt"
+ "os"
+ "path/filepath"
+
+ promconfig "github.com/prometheus/common/config"
+ "gopkg.in/yaml.v2"
+)
+
+type ExternalFilesConfig struct {
+ RuleFiles []string `yaml:"rule_files"`
+ ScrapeConfigFiles []string `yaml:"scrape_config_files"`
+}
+
+// GenerateChecksum generates a checksum of the YAML file and the files it references.
+func GenerateChecksum(yamlFilePath string) (string, error) {
+ hash := sha256.New()
+
+ yamlContent, err := os.ReadFile(yamlFilePath)
+ if err != nil {
+ return "", fmt.Errorf("error reading YAML file: %w", err)
+ }
+ _, err = hash.Write(yamlContent)
+ if err != nil {
+ return "", fmt.Errorf("error writing YAML file to hash: %w", err)
+ }
+
+ var config ExternalFilesConfig
+ if err := yaml.Unmarshal(yamlContent, &config); err != nil {
+ return "", fmt.Errorf("error unmarshalling YAML: %w", err)
+ }
+
+ dir := filepath.Dir(yamlFilePath)
+
+ for i, file := range config.RuleFiles {
+ config.RuleFiles[i] = promconfig.JoinDir(dir, file)
+ }
+ for i, file := range config.ScrapeConfigFiles {
+ config.ScrapeConfigFiles[i] = promconfig.JoinDir(dir, file)
+ }
+
+ files := map[string][]string{
+ "r": config.RuleFiles, // "r" for rule files
+ "s": config.ScrapeConfigFiles, // "s" for scrape config files
+ }
+
+ for _, prefix := range []string{"r", "s"} {
+ for _, pattern := range files[prefix] {
+ matchingFiles, err := filepath.Glob(pattern)
+ if err != nil {
+ return "", fmt.Errorf("error finding files with pattern %q: %w", pattern, err)
+ }
+
+ for _, file := range matchingFiles {
+ // Write prefix to the hash ("r" or "s") followed by \0, then
+ // the file path.
+ _, err = hash.Write([]byte(prefix + "\x00" + file + "\x00"))
+ if err != nil {
+ return "", fmt.Errorf("error writing %q path to hash: %w", file, err)
+ }
+
+ // Read and hash the content of the file.
+ content, err := os.ReadFile(file)
+ if err != nil {
+ return "", fmt.Errorf("error reading file %s: %w", file, err)
+ }
+ _, err = hash.Write(append(content, []byte("\x00")...))
+ if err != nil {
+ return "", fmt.Errorf("error writing %q content to hash: %w", file, err)
+ }
+ }
+ }
+ }
+
+ return hex.EncodeToString(hash.Sum(nil)), nil
+}
diff --git a/config/reload_test.go b/config/reload_test.go
new file mode 100644
index 0000000000..3e77260ab3
--- /dev/null
+++ b/config/reload_test.go
@@ -0,0 +1,246 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestGenerateChecksum(t *testing.T) {
+ tmpDir := t.TempDir()
+
+ // Define paths for the temporary files.
+ yamlFilePath := filepath.Join(tmpDir, "test.yml")
+ ruleFile := "rule_file.yml"
+ ruleFilePath := filepath.Join(tmpDir, ruleFile)
+ scrapeConfigFile := "scrape_config.yml"
+ scrapeConfigFilePath := filepath.Join(tmpDir, scrapeConfigFile)
+
+ // Define initial and modified content for the files.
+ originalRuleContent := "groups:\n- name: example\n rules:\n - alert: ExampleAlert"
+ modifiedRuleContent := "groups:\n- name: example\n rules:\n - alert: ModifiedAlert"
+
+ originalScrapeConfigContent := "scrape_configs:\n- job_name: example"
+ modifiedScrapeConfigContent := "scrape_configs:\n- job_name: modified_example"
+
+ testCases := []struct {
+ name string
+ ruleFilePath string
+ scrapeConfigFilePath string
+ }{
+ {
+ name: "Auto reload using relative path.",
+ ruleFilePath: ruleFile,
+ scrapeConfigFilePath: scrapeConfigFile,
+ },
+ {
+ name: "Auto reload using absolute path.",
+ ruleFilePath: ruleFilePath,
+ scrapeConfigFilePath: scrapeConfigFilePath,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ // Define YAML content referencing the rule and scrape config files.
+ yamlContent := fmt.Sprintf(`
+rule_files:
+ - %s
+scrape_config_files:
+ - %s
+`, tc.ruleFilePath, tc.scrapeConfigFilePath)
+
+ // Write initial content to files.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Generate the original checksum.
+ originalChecksum := calculateChecksum(t, yamlFilePath)
+
+ t.Run("Rule File Change", func(t *testing.T) {
+ // Modify the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(modifiedRuleContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Scrape Config Change", func(t *testing.T) {
+ // Modify the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(modifiedScrapeConfigContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Rule File Deletion", func(t *testing.T) {
+ // Delete the rule file.
+ require.NoError(t, os.Remove(ruleFilePath))
+
+ // Checksum should change.
+ deletedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, deletedChecksum)
+
+ // Restore the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Scrape Config Deletion", func(t *testing.T) {
+ // Delete the scrape config file.
+ require.NoError(t, os.Remove(scrapeConfigFilePath))
+
+ // Checksum should change.
+ deletedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, deletedChecksum)
+
+ // Restore the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Main File Change", func(t *testing.T) {
+ // Modify the main YAML file.
+ modifiedYamlContent := fmt.Sprintf(`
+global:
+ scrape_interval: 3s
+rule_files:
+ - %s
+scrape_config_files:
+ - %s
+`, tc.ruleFilePath, tc.scrapeConfigFilePath)
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the main YAML file.
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Rule File Removed from YAML Config", func(t *testing.T) {
+ // Modify the YAML content to remove the rule file.
+ modifiedYamlContent := fmt.Sprintf(`
+scrape_config_files:
+ - %s
+`, tc.scrapeConfigFilePath)
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the YAML content.
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Scrape Config Removed from YAML Config", func(t *testing.T) {
+ // Modify the YAML content to remove the scrape config file.
+ modifiedYamlContent := fmt.Sprintf(`
+rule_files:
+ - %s
+`, tc.ruleFilePath)
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the YAML content.
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Empty Rule File", func(t *testing.T) {
+ // Write an empty rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(""), 0o644))
+
+ // Checksum should change.
+ emptyChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, emptyChecksum)
+
+ // Restore the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Empty Scrape Config File", func(t *testing.T) {
+ // Write an empty scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(""), 0o644))
+
+ // Checksum should change.
+ emptyChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, emptyChecksum)
+
+ // Restore the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+ })
+ }
+}
+
+// calculateChecksum generates a checksum for the given YAML file path.
+func calculateChecksum(t *testing.T, yamlFilePath string) string {
+ checksum, err := GenerateChecksum(yamlFilePath)
+ require.NoError(t, err)
+ require.NotEmpty(t, checksum)
+ return checksum
+}
diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml
index 0e0aa2bd5d..cbe80404bf 100644
--- a/config/testdata/conf.good.yml
+++ b/config/testdata/conf.good.yml
@@ -8,6 +8,8 @@ global:
label_limit: 30
label_name_length_limit: 200
label_value_length_limit: 200
+ query_log_file: query.log
+ scrape_failure_log_file: fail.log
# scrape_timeout is set to the global default (10s).
external_labels:
@@ -45,6 +47,9 @@ remote_write:
headers:
name: value
+otlp:
+ promote_resource_attributes: ["k8s.cluster.name", "k8s.job.name", "k8s.namespace.name"]
+
remote_read:
- url: http://remote1/read
read_recent: true
@@ -69,6 +74,9 @@ scrape_configs:
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
+ fallback_scrape_protocol: PrometheusText0.0.4
+
+ scrape_failure_log_file: fail_prom.log
file_sd_configs:
- files:
- foo/*.slow.json
@@ -84,6 +92,12 @@ scrape_configs:
my: label
your: label
+ http_headers:
+ foo:
+ values: ["foobar"]
+ secrets: ["bar", "foo"]
+ files: ["valid_password_file"]
+
relabel_configs:
- source_labels: [job, __meta_dns_name]
regex: (.*)some-[regex]
@@ -403,6 +417,12 @@ scrape_configs:
- authorization:
credentials: abcdef
+ - job_name: stackit-servers
+ stackit_sd_configs:
+ - project: 11111111-1111-1111-1111-111111111111
+ authorization:
+ credentials: abcdef
+
- job_name: uyuni
uyuni_sd_configs:
- server: https://localhost:1234
diff --git a/config/testdata/config_with_deprecated_am_api_config.yml b/config/testdata/config_with_deprecated_am_api_config.yml
new file mode 100644
index 0000000000..ac89537ff1
--- /dev/null
+++ b/config/testdata/config_with_deprecated_am_api_config.yml
@@ -0,0 +1,7 @@
+alerting:
+ alertmanagers:
+ - scheme: http
+ api_version: v1
+ file_sd_configs:
+ - files:
+ - nonexistent_file.yml
diff --git a/config/testdata/global_convert_classic_hist_to_nhcb.good.yml b/config/testdata/global_convert_classic_hist_to_nhcb.good.yml
new file mode 100644
index 0000000000..c97b7597af
--- /dev/null
+++ b/config/testdata/global_convert_classic_hist_to_nhcb.good.yml
@@ -0,0 +1,6 @@
+global:
+ convert_classic_histograms_to_nhcb: true
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ['localhost:8080']
diff --git a/config/testdata/global_disable_always_scrape_classic_hist.good.yml b/config/testdata/global_disable_always_scrape_classic_hist.good.yml
new file mode 100644
index 0000000000..de28f1357a
--- /dev/null
+++ b/config/testdata/global_disable_always_scrape_classic_hist.good.yml
@@ -0,0 +1,6 @@
+global:
+ always_scrape_classic_histograms: false
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ['localhost:8080']
diff --git a/config/testdata/global_enable_always_scrape_classic_hist.good.yml b/config/testdata/global_enable_always_scrape_classic_hist.good.yml
new file mode 100644
index 0000000000..d42cf69cb6
--- /dev/null
+++ b/config/testdata/global_enable_always_scrape_classic_hist.good.yml
@@ -0,0 +1,6 @@
+global:
+ always_scrape_classic_histograms: true
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ['localhost:8080']
diff --git a/config/testdata/jobname_dup.bad.yml b/config/testdata/jobname_dup.bad.yml
index 0265493c30..d03cb0cf97 100644
--- a/config/testdata/jobname_dup.bad.yml
+++ b/config/testdata/jobname_dup.bad.yml
@@ -1,4 +1,6 @@
# Two scrape configs with the same job names are not allowed.
+global:
+ metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
- job_name: service-x
diff --git a/config/testdata/labelmap.bad.yml b/config/testdata/labelmap.bad.yml
index 29d2653990..b8aa117acf 100644
--- a/config/testdata/labelmap.bad.yml
+++ b/config/testdata/labelmap.bad.yml
@@ -2,4 +2,4 @@ scrape_configs:
- job_name: prometheus
relabel_configs:
- action: labelmap
- replacement: l-$1
+ replacement: !!binary "/w==$1"
diff --git a/config/testdata/labelname.bad.yml b/config/testdata/labelname.bad.yml
index c06853a26b..0c9c5ef3b2 100644
--- a/config/testdata/labelname.bad.yml
+++ b/config/testdata/labelname.bad.yml
@@ -1,3 +1,3 @@
global:
external_labels:
- not$allowed: value
+ !!binary "/w==": value
diff --git a/config/testdata/labelname2.bad.yml b/config/testdata/labelname2.bad.yml
deleted file mode 100644
index 7afcd6bcfc..0000000000
--- a/config/testdata/labelname2.bad.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-global:
- external_labels:
- 'not:allowed': value
diff --git a/config/testdata/local_convert_classic_hist_to_nhcb.good.yml b/config/testdata/local_convert_classic_hist_to_nhcb.good.yml
new file mode 100644
index 0000000000..0fd31727eb
--- /dev/null
+++ b/config/testdata/local_convert_classic_hist_to_nhcb.good.yml
@@ -0,0 +1,7 @@
+global:
+ convert_classic_histograms_to_nhcb: false
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ['localhost:8080']
+ convert_classic_histograms_to_nhcb: true
diff --git a/config/testdata/local_disable_always_scrape_classic_hist.good.yml b/config/testdata/local_disable_always_scrape_classic_hist.good.yml
new file mode 100644
index 0000000000..9e668340dc
--- /dev/null
+++ b/config/testdata/local_disable_always_scrape_classic_hist.good.yml
@@ -0,0 +1,7 @@
+global:
+ always_scrape_classic_histograms: true
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ['localhost:8080']
+ always_scrape_classic_histograms: false
diff --git a/config/testdata/local_disable_convert_classic_hist_to_nhcb.good.yml b/config/testdata/local_disable_convert_classic_hist_to_nhcb.good.yml
new file mode 100644
index 0000000000..b41af7e0a5
--- /dev/null
+++ b/config/testdata/local_disable_convert_classic_hist_to_nhcb.good.yml
@@ -0,0 +1,7 @@
+global:
+ convert_classic_histograms_to_nhcb: true
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ['localhost:8080']
+ convert_classic_histograms_to_nhcb: false
diff --git a/config/testdata/local_enable_always_scrape_classic_hist.good.yml b/config/testdata/local_enable_always_scrape_classic_hist.good.yml
new file mode 100644
index 0000000000..165be07754
--- /dev/null
+++ b/config/testdata/local_enable_always_scrape_classic_hist.good.yml
@@ -0,0 +1,7 @@
+global:
+ always_scrape_classic_histograms: false
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ['localhost:8080']
+ always_scrape_classic_histograms: true
diff --git a/config/testdata/lowercase.bad.yml b/config/testdata/lowercase.bad.yml
index 9bc9583341..6dd72e6476 100644
--- a/config/testdata/lowercase.bad.yml
+++ b/config/testdata/lowercase.bad.yml
@@ -1,3 +1,5 @@
+global:
+ metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
relabel_configs:
diff --git a/config/testdata/lowercase2.bad.yml b/config/testdata/lowercase2.bad.yml
deleted file mode 100644
index bde8862c66..0000000000
--- a/config/testdata/lowercase2.bad.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-scrape_configs:
- - job_name: prometheus
- relabel_configs:
- - action: lowercase
- source_labels: [__name__]
- target_label: 42lab
diff --git a/config/testdata/otlp_allow_keep_identifying_resource_attributes.good.yml b/config/testdata/otlp_allow_keep_identifying_resource_attributes.good.yml
new file mode 100644
index 0000000000..63151e2a77
--- /dev/null
+++ b/config/testdata/otlp_allow_keep_identifying_resource_attributes.good.yml
@@ -0,0 +1,2 @@
+otlp:
+ keep_identifying_resource_attributes: true
diff --git a/config/testdata/otlp_allow_utf8.bad.yml b/config/testdata/otlp_allow_utf8.bad.yml
new file mode 100644
index 0000000000..488e4b0558
--- /dev/null
+++ b/config/testdata/otlp_allow_utf8.bad.yml
@@ -0,0 +1,4 @@
+global:
+ metric_name_validation_scheme: legacy
+otlp:
+ translation_strategy: Invalid
diff --git a/config/testdata/otlp_allow_utf8.good.yml b/config/testdata/otlp_allow_utf8.good.yml
new file mode 100644
index 0000000000..f3069d2fdd
--- /dev/null
+++ b/config/testdata/otlp_allow_utf8.good.yml
@@ -0,0 +1,2 @@
+otlp:
+ translation_strategy: NoUTF8EscapingWithSuffixes
diff --git a/config/testdata/otlp_allow_utf8.incompatible.yml b/config/testdata/otlp_allow_utf8.incompatible.yml
new file mode 100644
index 0000000000..2625c24131
--- /dev/null
+++ b/config/testdata/otlp_allow_utf8.incompatible.yml
@@ -0,0 +1,4 @@
+global:
+ metric_name_validation_scheme: legacy
+otlp:
+ translation_strategy: NoUTF8EscapingWithSuffixes
diff --git a/config/testdata/otlp_convert_histograms_to_nhcb.good.yml b/config/testdata/otlp_convert_histograms_to_nhcb.good.yml
new file mode 100644
index 0000000000..1462cafe9b
--- /dev/null
+++ b/config/testdata/otlp_convert_histograms_to_nhcb.good.yml
@@ -0,0 +1,2 @@
+otlp:
+ convert_histograms_to_nhcb: true
diff --git a/config/testdata/otlp_empty.yml b/config/testdata/otlp_empty.yml
new file mode 100644
index 0000000000..7085e9246b
--- /dev/null
+++ b/config/testdata/otlp_empty.yml
@@ -0,0 +1 @@
+global:
diff --git a/config/testdata/otlp_ignore_resource_attributes_without_promote_all.bad.yml b/config/testdata/otlp_ignore_resource_attributes_without_promote_all.bad.yml
new file mode 100644
index 0000000000..be4ee60f2a
--- /dev/null
+++ b/config/testdata/otlp_ignore_resource_attributes_without_promote_all.bad.yml
@@ -0,0 +1,2 @@
+otlp:
+ ignore_resource_attributes: ["k8s.job.name"]
diff --git a/config/testdata/otlp_no_translation.good.yml b/config/testdata/otlp_no_translation.good.yml
new file mode 100644
index 0000000000..e5c4460842
--- /dev/null
+++ b/config/testdata/otlp_no_translation.good.yml
@@ -0,0 +1,2 @@
+otlp:
+ translation_strategy: NoTranslation
diff --git a/config/testdata/otlp_no_translation.incompatible.yml b/config/testdata/otlp_no_translation.incompatible.yml
new file mode 100644
index 0000000000..33c5a756f5
--- /dev/null
+++ b/config/testdata/otlp_no_translation.incompatible.yml
@@ -0,0 +1,4 @@
+global:
+ metric_name_validation_scheme: legacy
+otlp:
+ translation_strategy: NoTranslation
diff --git a/config/testdata/otlp_promote_all_resource_attributes.bad.yml b/config/testdata/otlp_promote_all_resource_attributes.bad.yml
new file mode 100644
index 0000000000..2be54ec155
--- /dev/null
+++ b/config/testdata/otlp_promote_all_resource_attributes.bad.yml
@@ -0,0 +1,3 @@
+otlp:
+ promote_all_resource_attributes: true
+ promote_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name", "k8s.job.name"]
diff --git a/config/testdata/otlp_promote_scope_metadata.good.yml b/config/testdata/otlp_promote_scope_metadata.good.yml
new file mode 100644
index 0000000000..d88f657abd
--- /dev/null
+++ b/config/testdata/otlp_promote_scope_metadata.good.yml
@@ -0,0 +1,2 @@
+otlp:
+ promote_scope_metadata: true
diff --git a/config/testdata/otlp_sanitize_default_resource_attributes.good.yml b/config/testdata/otlp_sanitize_default_resource_attributes.good.yml
new file mode 100644
index 0000000000..abdd98dc7a
--- /dev/null
+++ b/config/testdata/otlp_sanitize_default_resource_attributes.good.yml
@@ -0,0 +1 @@
+otlp:
diff --git a/config/testdata/otlp_sanitize_ignore_resource_attributes.bad.yml b/config/testdata/otlp_sanitize_ignore_resource_attributes.bad.yml
new file mode 100644
index 0000000000..57ce0efac0
--- /dev/null
+++ b/config/testdata/otlp_sanitize_ignore_resource_attributes.bad.yml
@@ -0,0 +1,3 @@
+otlp:
+ promote_all_resource_attributes: true
+ ignore_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name", "k8s.job.name", ""]
diff --git a/config/testdata/otlp_sanitize_ignore_resource_attributes.good.yml b/config/testdata/otlp_sanitize_ignore_resource_attributes.good.yml
new file mode 100644
index 0000000000..7a50fef405
--- /dev/null
+++ b/config/testdata/otlp_sanitize_ignore_resource_attributes.good.yml
@@ -0,0 +1,3 @@
+otlp:
+ promote_all_resource_attributes: true
+ ignore_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name"]
diff --git a/config/testdata/otlp_sanitize_promote_resource_attributes.bad.yml b/config/testdata/otlp_sanitize_promote_resource_attributes.bad.yml
new file mode 100644
index 0000000000..37ec5d1209
--- /dev/null
+++ b/config/testdata/otlp_sanitize_promote_resource_attributes.bad.yml
@@ -0,0 +1,2 @@
+otlp:
+ promote_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name", "k8s.job.name", ""]
diff --git a/config/testdata/otlp_sanitize_promote_resource_attributes.good.yml b/config/testdata/otlp_sanitize_promote_resource_attributes.good.yml
new file mode 100644
index 0000000000..67247e7743
--- /dev/null
+++ b/config/testdata/otlp_sanitize_promote_resource_attributes.good.yml
@@ -0,0 +1,2 @@
+otlp:
+ promote_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name"]
diff --git a/config/testdata/otlp_sanitize_resource_attributes_promote_all.good.yml b/config/testdata/otlp_sanitize_resource_attributes_promote_all.good.yml
new file mode 100644
index 0000000000..2c8360011b
--- /dev/null
+++ b/config/testdata/otlp_sanitize_resource_attributes_promote_all.good.yml
@@ -0,0 +1,2 @@
+otlp:
+ promote_all_resource_attributes: true
diff --git a/config/testdata/scrape_config_default_validation_mode.yml b/config/testdata/scrape_config_default_validation_mode.yml
new file mode 100644
index 0000000000..96680d6438
--- /dev/null
+++ b/config/testdata/scrape_config_default_validation_mode.yml
@@ -0,0 +1,2 @@
+scrape_configs:
+ - job_name: prometheus
diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml
new file mode 100644
index 0000000000..07cfe47594
--- /dev/null
+++ b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml
@@ -0,0 +1,5 @@
+scrape_configs:
+ - job_name: node
+ fallback_scrape_protocol: "prometheusproto"
+ static_configs:
+ - targets: ['localhost:8080']
diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml
new file mode 100644
index 0000000000..c5d133f9c4
--- /dev/null
+++ b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml
@@ -0,0 +1,5 @@
+scrape_configs:
+ - job_name: node
+ fallback_scrape_protocol: ["OpenMetricsText1.0.0", "PrometheusText0.0.4"]
+ static_configs:
+ - targets: ['localhost:8080']
diff --git a/config/testdata/scrape_config_global_validation_mode.yml b/config/testdata/scrape_config_global_validation_mode.yml
new file mode 100644
index 0000000000..fb4baf7b07
--- /dev/null
+++ b/config/testdata/scrape_config_global_validation_mode.yml
@@ -0,0 +1,5 @@
+global:
+ metric_name_validation_scheme: legacy
+ metric_name_escaping_scheme: dots
+scrape_configs:
+ - job_name: prometheus
diff --git a/config/testdata/scrape_config_local_global_validation_mode.yml b/config/testdata/scrape_config_local_global_validation_mode.yml
new file mode 100644
index 0000000000..29cd2b4140
--- /dev/null
+++ b/config/testdata/scrape_config_local_global_validation_mode.yml
@@ -0,0 +1,7 @@
+global:
+ metric_name_validation_scheme: legacy
+ metric_name_escaping_scheme: values
+scrape_configs:
+ - job_name: prometheus
+ metric_name_validation_scheme: utf8
+ metric_name_escaping_scheme: dots
diff --git a/config/testdata/scrape_config_local_validation_mode.yml b/config/testdata/scrape_config_local_validation_mode.yml
new file mode 100644
index 0000000000..b4d1ff05df
--- /dev/null
+++ b/config/testdata/scrape_config_local_validation_mode.yml
@@ -0,0 +1,4 @@
+scrape_configs:
+ - job_name: prometheus
+ metric_name_validation_scheme: legacy
+ metric_name_escaping_scheme: values
diff --git a/config/testdata/scrape_config_utf8_conflicting.bad.yml b/config/testdata/scrape_config_utf8_conflicting.bad.yml
new file mode 100644
index 0000000000..3f1b8f87ac
--- /dev/null
+++ b/config/testdata/scrape_config_utf8_conflicting.bad.yml
@@ -0,0 +1,5 @@
+global:
+ metric_name_validation_scheme: legacy
+ metric_name_escaping_scheme: allow-utf-8
+scrape_configs:
+ - job_name: prometheus
diff --git a/config/testdata/stackit_endpoint.bad.yml b/config/testdata/stackit_endpoint.bad.yml
new file mode 100644
index 0000000000..ecb0fefe9e
--- /dev/null
+++ b/config/testdata/stackit_endpoint.bad.yml
@@ -0,0 +1,4 @@
+scrape_configs:
+ - job_name: stackit
+ stackit_sd_configs:
+ - endpoint: "://invalid"
diff --git a/config/testdata/static_config.bad.json b/config/testdata/static_config.bad.json
deleted file mode 100644
index 6050ed9c50..0000000000
--- a/config/testdata/static_config.bad.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
- "targets": ["1.2.3.4:9100"],
- "labels": {
- "some_valid_label": "foo",
- "oops:this-label-is-invalid": "bar"
- }
-}
diff --git a/config/testdata/static_config.bad.yml b/config/testdata/static_config.bad.yml
index 1d229ec5e7..7e9003dcbf 100644
--- a/config/testdata/static_config.bad.yml
+++ b/config/testdata/static_config.bad.yml
@@ -1,4 +1,4 @@
targets: ['1.2.3.4:9001', '1.2.3.5:9090']
labels:
valid_label: foo
- not:valid_label: bar
+ !!binary "/w==": bar
diff --git a/config/testdata/uppercase2.bad.yml b/config/testdata/uppercase2.bad.yml
deleted file mode 100644
index 330b9aceb6..0000000000
--- a/config/testdata/uppercase2.bad.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-scrape_configs:
- - job_name: prometheus
- relabel_configs:
- - action: uppercase
- source_labels: [__name__]
- target_label: 42lab
diff --git a/console_libraries/menu.lib b/console_libraries/menu.lib
deleted file mode 100644
index 199ebf9f48..0000000000
--- a/console_libraries/menu.lib
+++ /dev/null
@@ -1,82 +0,0 @@
-{{/* vim: set ft=html: */}}
-
-{{/* Navbar, should be passed . */}}
-{{ define "navbar" }}
-
-{{ end }}
-
-{{/* LHS menu, should be passed . */}}
-{{ define "menu" }}
-
-{{ end }}
-
-{{/* Helper, pass (args . path name) */}}
-{{ define "_menuItem" }}
-
-{{ end }}
-
diff --git a/console_libraries/prom.lib b/console_libraries/prom.lib
deleted file mode 100644
index d7d436f947..0000000000
--- a/console_libraries/prom.lib
+++ /dev/null
@@ -1,138 +0,0 @@
-{{/* vim: set ft=html: */}}
-{{/* Load Prometheus console library JS/CSS. Should go in */}}
-{{ define "prom_console_head" }}
-
-
-
-
-
-
-
-
-
-
-
-
-
-{{ end }}
-
-{{/* Top of all pages. */}}
-{{ define "head" -}}
-
-
-
-{{ template "prom_console_head" }}
-
-
-{{ template "navbar" . }}
-
-{{ template "menu" . }}
-{{ end }}
-
-{{ define "__prom_query_drilldown_noop" }}{{ . }}{{ end }}
-{{ define "humanize" }}{{ humanize . }}{{ end }}
-{{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }}
-{{ define "humanize1024" }}{{ humanize1024 . }}{{ end }}
-{{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }}
-{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }}
-{{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }}
-{{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }}
-{{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }}
-
-{{/* prom_query_drilldown (args expr suffix? renderTemplate?)
-Displays the result of the expression, with a link to /graph for it.
-
-renderTemplate is the name of the template to use to render the value.
-*/}}
-{{ define "prom_query_drilldown" }}
-{{ $expr := .arg0 }}{{ $suffix := (or .arg1 "") }}{{ $renderTemplate := (or .arg2 "__prom_query_drilldown_noop") }}
-{{ with query $expr }}{{tmpl $renderTemplate ( . | first | value )}}{{ $suffix }}{{ else }}-{{ end }}
-{{ end }}
-
-{{ define "prom_path" }}/consoles/{{ .Path }}?{{ range $param, $value := .Params }}{{ $param }}={{ $value }}&{{ end }}{{ end }}"
-
-{{ define "prom_right_table_head" }}
-
-
-{{ end }}
-{{ define "prom_right_table_tail" }}
-
-
-{{ end }}
-
-{{/* RHS table head, pass job name. Should be used after prom_right_table_head. */}}
-{{ define "prom_right_table_job_head" }}
-
- | {{ . }} |
- {{ template "prom_query_drilldown" (args (printf "sum(up{job='%s'})" .)) }} / {{ template "prom_query_drilldown" (args (printf "count(up{job='%s'})" .)) }} |
-
-
- | CPU |
- {{ template "prom_query_drilldown" (args (printf "avg by(job)(irate(process_cpu_seconds_total{job='%s'}[5m]))" .) "s/s" "humanizeNoSmallPrefix") }} |
-
-
- | Memory |
- {{ template "prom_query_drilldown" (args (printf "avg by(job)(process_resident_memory_bytes{job='%s'})" .) "B" "humanize1024") }} |
-
-{{ end }}
-
-
-{{ define "prom_content_head" }}
-
-
-{{ template "prom_graph_timecontrol" . }}
-{{ end }}
-{{ define "prom_content_tail" }}
-
-
-{{ end }}
-
-{{ define "prom_graph_timecontrol" }}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-{{ end }}
-
-{{/* Bottom of all pages. */}}
-{{ define "tail" }}
-
-
-{{ end }}
diff --git a/consoles/index.html.example b/consoles/index.html.example
deleted file mode 100644
index c725d30dea..0000000000
--- a/consoles/index.html.example
+++ /dev/null
@@ -1,28 +0,0 @@
-{{ template "head" . }}
-
-{{ template "prom_right_table_head" }}
-{{ template "prom_right_table_tail" }}
-
-{{ template "prom_content_head" . }}
-Overview
-These are example consoles for Prometheus.
-
-These consoles expect exporters to have the following job labels:
-
-
- | Exporter |
- Job label |
-
-
- | Node Exporter |
- node |
-
-
- | Prometheus |
- prometheus |
-
-
-
-{{ template "prom_content_tail" . }}
-
-{{ template "tail" }}
diff --git a/consoles/node-cpu.html b/consoles/node-cpu.html
deleted file mode 100644
index 284ad738f2..0000000000
--- a/consoles/node-cpu.html
+++ /dev/null
@@ -1,60 +0,0 @@
-{{ template "head" . }}
-
-{{ template "prom_right_table_head" }}
-
- | CPU(s): {{ template "prom_query_drilldown" (args (printf "scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance)) }} |
-
-{{ range printf "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='%s'}[5m])) * 100 / scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance .Params.instance | query | sortByLabel "mode" }}
-
- | {{ .Labels.mode | title }} CPU |
- {{ .Value | printf "%.1f" }}% |
-
-{{ end }}
- | Misc |
-
- | Processes Running |
- {{ template "prom_query_drilldown" (args (printf "node_procs_running{job='node',instance='%s'}" .Params.instance) "" "humanize") }} |
-
-
- | Processes Blocked |
- {{ template "prom_query_drilldown" (args (printf "node_procs_blocked{job='node',instance='%s'}" .Params.instance) "" "humanize") }} |
-
-
- | Forks |
- {{ template "prom_query_drilldown" (args (printf "irate(node_forks_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} |
-
-
- | Context Switches |
- {{ template "prom_query_drilldown" (args (printf "irate(node_context_switches_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} |
-
-
- | Interrupts |
- {{ template "prom_query_drilldown" (args (printf "irate(node_intr_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} |
-
-
- | 1m Loadavg |
- {{ template "prom_query_drilldown" (args (printf "node_load1{job='node',instance='%s'}" .Params.instance)) }} |
-
-
-
-{{ template "prom_right_table_tail" }}
-
-{{ template "prom_content_head" . }}
- Node CPU - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}
-
- CPU Usage
-
-
-{{ template "prom_content_tail" . }}
-
-{{ template "tail" }}
diff --git a/consoles/node-disk.html b/consoles/node-disk.html
deleted file mode 100644
index ffff41b797..0000000000
--- a/consoles/node-disk.html
+++ /dev/null
@@ -1,78 +0,0 @@
-{{ template "head" . }}
-
-{{ template "prom_right_table_head" }}
-
- | Disks |
-
-{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }}
- {{ .Labels.device }} |
-
- | Utilization |
- {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} |
-
-
- | Throughput |
- {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} |
-
-
- | Avg Read Time |
- {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_reads_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} |
-
-
- | Avg Write Time |
- {{ template "prom_query_drilldown" (args (printf "irate(node_disk_write_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_writes_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} |
-
-{{ end }}
-
- | Filesystem Fullness |
-
-{{ define "roughlyNearZero" }}
-{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }}
-{{ end }}
-{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }}
-
- | {{ .Labels.mountpoint }} |
- {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} |
-
-{{ end }}
-
-
-{{ template "prom_right_table_tail" }}
-
-{{ template "prom_content_head" . }}
- Node Disk - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}
-
- Disk I/O Utilization
-
-
- Filesystem Usage
-
-
-{{ template "prom_content_tail" . }}
-
-{{ template "tail" }}
diff --git a/consoles/node-overview.html b/consoles/node-overview.html
deleted file mode 100644
index 4ae8984b99..0000000000
--- a/consoles/node-overview.html
+++ /dev/null
@@ -1,121 +0,0 @@
-{{ template "head" . }}
-
-{{ template "prom_right_table_head" }}
- | Overview |
-
- | User CPU |
- {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='user'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} |
-
-
- | System CPU |
- {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='system'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} |
-
-
- | Memory Total |
- {{ template "prom_query_drilldown" (args (printf "node_memory_MemTotal_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} |
-
-
- | Memory Free |
- {{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} |
-
-
- | Network |
-
-{{ range printf "node_network_receive_bytes_total{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device" }}
-
- | {{ .Labels.device }} Received |
- {{ template "prom_query_drilldown" (args (printf "irate(node_network_receive_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} |
-
-
- | {{ .Labels.device }} Transmitted |
- {{ template "prom_query_drilldown" (args (printf "irate(node_network_transmit_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} |
-
-{{ end }}
-
- | Disks |
-
-{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s',device!~'^(md\\\\d+$|dm-)'}" .Params.instance | query | sortByLabel "device" }}
-
- | {{ .Labels.device }} Utilization |
- {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} |
-
-{{ end }}
-{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }}
-
- | {{ .Labels.device }} Throughput |
- {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} |
-
-{{ end }}
-
- | Filesystem Fullness |
-
-{{ define "roughlyNearZero" }}
-{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }}
-{{ end }}
-{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }}
-
- | {{ .Labels.mountpoint }} |
- {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} |
-
-{{ end }}
-
-{{ template "prom_right_table_tail" }}
-
-{{ template "prom_content_head" . }}
- Node Overview - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}
-
- CPU Usage
-
-
-
- Disk I/O Utilization
-
-
-
- Memory
-
-
-
-{{ template "prom_content_tail" . }}
-
-{{ template "tail" }}
diff --git a/consoles/node.html b/consoles/node.html
deleted file mode 100644
index c1dfc1a891..0000000000
--- a/consoles/node.html
+++ /dev/null
@@ -1,35 +0,0 @@
-{{ template "head" . }}
-
-{{ template "prom_right_table_head" }}
-
- | Node |
- {{ template "prom_query_drilldown" (args "sum(up{job='node'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='node'})") }} |
-
-{{ template "prom_right_table_tail" }}
-
-{{ template "prom_content_head" . }}
-Node
-
-
-
- | Node |
- Up |
- CPU Used |
- Memory Available |
-
-{{ range query "up{job='node'}" | sortByLabel "instance" }}
-
- | {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Labels.instance }} |
- Yes{{ else }} class="alert-danger">No{{ end }} |
- {{ template "prom_query_drilldown" (args (printf "100 * (1 - avg by(instance) (sum without(mode) (irate(node_cpu_seconds_total{job='node',mode=~'idle|iowait|steal',instance='%s'}[5m]))))" .Labels.instance) "%" "printf.1f") }} |
- {{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'} + node_memory_Cached_bytes{job='node',instance='%s'} + node_memory_Buffers_bytes{job='node',instance='%s'}" .Labels.instance .Labels.instance .Labels.instance) "B" "humanize1024") }} |
-
-{{ else }}
-| No nodes found. |
-{{ end }}
-
-
-
-{{ template "prom_content_tail" . }}
-
-{{ template "tail" }}
diff --git a/consoles/prometheus-overview.html b/consoles/prometheus-overview.html
deleted file mode 100644
index 08e027de06..0000000000
--- a/consoles/prometheus-overview.html
+++ /dev/null
@@ -1,96 +0,0 @@
-{{ template "head" . }}
-
-{{ template "prom_right_table_head" }}
-
- | Overview |
-
-
- | CPU |
- {{ template "prom_query_drilldown" (args (printf "irate(process_cpu_seconds_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "s/s" "humanizeNoSmallPrefix") }} |
-
-
- | Memory |
- {{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Params.instance) "B" "humanize1024") }} |
-
-
- | Version |
- {{ with query (printf "prometheus_build_info{job='prometheus',instance='%s'}" .Params.instance) }}{{. | first | label "version"}}{{end}} |
-
-
-
- | Storage |
-
-
- | Ingested Samples |
- {{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "/s" "humanizeNoSmallPrefix") }} |
-
-
- | Head Series |
- {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_head_series{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} |
-
-
- | Blocks Loaded |
- {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_blocks_loaded{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} |
-
-
- | Rules |
-
-
- | Evaluation Duration |
- {{ template "prom_query_drilldown" (args (printf "irate(prometheus_evaluator_duration_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_evaluator_duration_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }} |
-
-
- | Notification Latency |
- {{ template "prom_query_drilldown" (args (printf "irate(prometheus_notifications_latency_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_notifications_latency_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }} |
-
-
- | Notification Queue |
- {{ template "prom_query_drilldown" (args (printf "prometheus_notifications_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} |
-
-
- | HTTP Server |
-
-{{ range printf "prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s'}" .Params.instance | query | sortByLabel "handler" }}
-
- | {{ .Labels.handler }} |
- {{ template "prom_query_drilldown" (args (printf "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }} |
-
-{{ end }}
-
-{{ template "prom_right_table_tail" }}
-
-{{ template "prom_content_head" . }}
-
-
Prometheus Overview - {{ .Params.instance }}
-
-
Ingested Samples
-
-
-
-
HTTP Server
-
-
-
-{{ template "prom_content_tail" . }}
-
-{{ template "tail" }}
diff --git a/consoles/prometheus.html b/consoles/prometheus.html
deleted file mode 100644
index e0d026376d..0000000000
--- a/consoles/prometheus.html
+++ /dev/null
@@ -1,34 +0,0 @@
-{{ template "head" . }}
-
-{{ template "prom_right_table_head" }}
-
- | Prometheus |
- {{ template "prom_query_drilldown" (args "sum(up{job='prometheus'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='prometheus'})") }} |
-
-{{ template "prom_right_table_tail" }}
-
-{{ template "prom_content_head" . }}
-Prometheus
-
-
-
- | Prometheus |
- Up |
- Ingested Samples |
- Memory |
-
-{{ range query "up{job='prometheus'}" | sortByLabel "instance" }}
-
- | {{ .Labels.instance }} |
- Yes{{ else }} class="alert-danger">No{{ end }} |
- {{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Labels.instance) "/s" "humanizeNoSmallPrefix") }} |
- {{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Labels.instance) "B" "humanize1024")}} |
-
-{{ else }}
-| No devices found. |
-{{ end }}
-
-
-{{ template "prom_content_tail" . }}
-
-{{ template "tail" }}
diff --git a/discovery/README.md b/discovery/README.md
index 4c06608625..d5418e7fb1 100644
--- a/discovery/README.md
+++ b/discovery/README.md
@@ -233,7 +233,7 @@ type Config interface {
}
type DiscovererOptions struct {
- Logger log.Logger
+ Logger *slog.Logger
// A registerer for the Discoverer's metrics.
Registerer prometheus.Registerer
diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go
index a44912481a..7e35a1807f 100644
--- a/discovery/aws/ec2.go
+++ b/discovery/aws/ec2.go
@@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
"strings"
@@ -29,11 +30,11 @@ import (
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
+ "github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@@ -100,7 +101,7 @@ type EC2SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*EC2SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*EC2SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &ec2Metrics{
refreshMetrics: rmi,
}
@@ -146,9 +147,9 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// the Discoverer interface.
type EC2Discovery struct {
*refresh.Discovery
- logger log.Logger
+ logger *slog.Logger
cfg *EC2SDConfig
- ec2 *ec2.EC2
+ ec2 ec2iface.EC2API
// azToAZID maps this account's availability zones to their underlying AZ
// ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so
@@ -157,14 +158,14 @@ type EC2Discovery struct {
}
// NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets.
-func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) {
+func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) {
m, ok := metrics.(*ec2Metrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
d := &EC2Discovery{
logger: logger,
@@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.Dis
return d, nil
}
-func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) {
+func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) {
if d.ec2 != nil {
return d.ec2, nil
}
@@ -254,14 +255,14 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
// Prometheus requires a reload if AWS adds a new AZ to the region.
if d.azToAZID == nil {
if err := d.refreshAZIDs(ctx); err != nil {
- level.Debug(d.logger).Log(
- "msg", "Unable to describe availability zones",
+ d.logger.Debug(
+ "Unable to describe availability zones",
"err", err)
}
}
input := &ec2.DescribeInstancesInput{Filters: filters}
- if err := ec2Client.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
+ if err := ec2Client.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, _ bool) bool {
for _, r := range p.Reservations {
for _, inst := range r.Instances {
if inst.PrivateIpAddress == nil {
@@ -296,8 +297,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone)
azID, ok := d.azToAZID[*inst.Placement.AvailabilityZone]
if !ok && d.azToAZID != nil {
- level.Debug(d.logger).Log(
- "msg", "Availability zone ID not found",
+ d.logger.Debug(
+ "Availability zone ID not found",
"az", *inst.Placement.AvailabilityZone)
}
labels[ec2LabelAZID] = model.LabelValue(azID)
diff --git a/discovery/aws/ec2_test.go b/discovery/aws/ec2_test.go
new file mode 100644
index 0000000000..2955e0e02e
--- /dev/null
+++ b/discovery/aws/ec2_test.go
@@ -0,0 +1,434 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package aws
+
+import (
+ "context"
+ "errors"
+ "testing"
+
+ "github.com/aws/aws-sdk-go/aws"
+ "github.com/aws/aws-sdk-go/aws/request"
+ "github.com/aws/aws-sdk-go/service/ec2"
+ "github.com/aws/aws-sdk-go/service/ec2/ec2iface"
+ "github.com/prometheus/common/model"
+ "github.com/stretchr/testify/require"
+ "go.uber.org/goleak"
+
+ "github.com/prometheus/prometheus/discovery/targetgroup"
+)
+
+// Helper function to get pointers on literals.
+// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package.
+func strptr(str string) *string {
+ return &str
+}
+
+func boolptr(b bool) *bool {
+ return &b
+}
+
+func int64ptr(i int64) *int64 {
+ return &i
+}
+
+// Struct for test data.
+type ec2DataStore struct {
+ region string
+
+ azToAZID map[string]string
+
+ ownerID string
+
+ instances []*ec2.Instance
+}
+
+// The tests itself.
+func TestMain(m *testing.M) {
+ goleak.VerifyTestMain(m)
+}
+
+func TestEC2DiscoveryRefreshAZIDs(t *testing.T) {
+ ctx := context.Background()
+
+ // iterate through the test cases
+ for _, tt := range []struct {
+ name string
+ shouldFail bool
+ ec2Data *ec2DataStore
+ }{
+ {
+ name: "Normal",
+ shouldFail: false,
+ ec2Data: &ec2DataStore{
+ azToAZID: map[string]string{
+ "azname-a": "azid-1",
+ "azname-b": "azid-2",
+ "azname-c": "azid-3",
+ },
+ },
+ },
+ {
+ name: "HandleError",
+ shouldFail: true,
+ ec2Data: &ec2DataStore{},
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ client := newMockEC2Client(tt.ec2Data)
+
+ d := &EC2Discovery{
+ ec2: client,
+ }
+
+ err := d.refreshAZIDs(ctx)
+ if tt.shouldFail {
+ require.Error(t, err)
+ } else {
+ require.NoError(t, err)
+ require.Equal(t, client.ec2Data.azToAZID, d.azToAZID)
+ }
+ })
+ }
+}
+
+func TestEC2DiscoveryRefresh(t *testing.T) {
+ ctx := context.Background()
+
+ // iterate through the test cases
+ for _, tt := range []struct {
+ name string
+ ec2Data *ec2DataStore
+ expected []*targetgroup.Group
+ }{
+ {
+ name: "NoPrivateIp",
+ ec2Data: &ec2DataStore{
+ region: "region-noprivateip",
+ azToAZID: map[string]string{
+ "azname-a": "azid-1",
+ "azname-b": "azid-2",
+ "azname-c": "azid-3",
+ },
+ instances: []*ec2.Instance{
+ {
+ InstanceId: strptr("instance-id-noprivateip"),
+ },
+ },
+ },
+ expected: []*targetgroup.Group{
+ {
+ Source: "region-noprivateip",
+ },
+ },
+ },
+ {
+ name: "NoVpc",
+ ec2Data: &ec2DataStore{
+ region: "region-novpc",
+ azToAZID: map[string]string{
+ "azname-a": "azid-1",
+ "azname-b": "azid-2",
+ "azname-c": "azid-3",
+ },
+ ownerID: "owner-id-novpc",
+ instances: []*ec2.Instance{
+ {
+ // set every possible options and test them here
+ Architecture: strptr("architecture-novpc"),
+ ImageId: strptr("ami-novpc"),
+ InstanceId: strptr("instance-id-novpc"),
+ InstanceLifecycle: strptr("instance-lifecycle-novpc"),
+ InstanceType: strptr("instance-type-novpc"),
+ Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")},
+ Platform: strptr("platform-novpc"),
+ PrivateDnsName: strptr("private-dns-novpc"),
+ PrivateIpAddress: strptr("1.2.3.4"),
+ PublicDnsName: strptr("public-dns-novpc"),
+ PublicIpAddress: strptr("42.42.42.2"),
+ State: &ec2.InstanceState{Name: strptr("running")},
+ // test tags once and for all
+ Tags: []*ec2.Tag{
+ {Key: strptr("tag-1-key"), Value: strptr("tag-1-value")},
+ {Key: strptr("tag-2-key"), Value: strptr("tag-2-value")},
+ nil,
+ {Value: strptr("tag-4-value")},
+ {Key: strptr("tag-5-key")},
+ },
+ },
+ },
+ },
+ expected: []*targetgroup.Group{
+ {
+ Source: "region-novpc",
+ Targets: []model.LabelSet{
+ {
+ "__address__": model.LabelValue("1.2.3.4:4242"),
+ "__meta_ec2_ami": model.LabelValue("ami-novpc"),
+ "__meta_ec2_architecture": model.LabelValue("architecture-novpc"),
+ "__meta_ec2_availability_zone": model.LabelValue("azname-b"),
+ "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"),
+ "__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"),
+ "__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"),
+ "__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"),
+ "__meta_ec2_instance_state": model.LabelValue("running"),
+ "__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"),
+ "__meta_ec2_platform": model.LabelValue("platform-novpc"),
+ "__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"),
+ "__meta_ec2_private_ip": model.LabelValue("1.2.3.4"),
+ "__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"),
+ "__meta_ec2_public_ip": model.LabelValue("42.42.42.2"),
+ "__meta_ec2_region": model.LabelValue("region-novpc"),
+ "__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"),
+ "__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"),
+ },
+ },
+ },
+ },
+ },
+ {
+ name: "Ipv4",
+ ec2Data: &ec2DataStore{
+ region: "region-ipv4",
+ azToAZID: map[string]string{
+ "azname-a": "azid-1",
+ "azname-b": "azid-2",
+ "azname-c": "azid-3",
+ },
+ instances: []*ec2.Instance{
+ {
+ // just the minimum needed for the refresh work
+ ImageId: strptr("ami-ipv4"),
+ InstanceId: strptr("instance-id-ipv4"),
+ InstanceType: strptr("instance-type-ipv4"),
+ Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")},
+ PrivateIpAddress: strptr("5.6.7.8"),
+ State: &ec2.InstanceState{Name: strptr("running")},
+ SubnetId: strptr("azid-3"),
+ VpcId: strptr("vpc-ipv4"),
+ // network interfaces
+ NetworkInterfaces: []*ec2.InstanceNetworkInterface{
+ // interface without subnet -> should be ignored
+ {
+ Ipv6Addresses: []*ec2.InstanceIpv6Address{
+ {
+ Ipv6Address: strptr("2001:db8:1::1"),
+ IsPrimaryIpv6: boolptr(true),
+ },
+ },
+ },
+ // interface with subnet, no IPv6
+ {
+ Ipv6Addresses: []*ec2.InstanceIpv6Address{},
+ SubnetId: strptr("azid-3"),
+ },
+ // interface with another subnet, no IPv6
+ {
+ Ipv6Addresses: []*ec2.InstanceIpv6Address{},
+ SubnetId: strptr("azid-1"),
+ },
+ },
+ },
+ },
+ },
+ expected: []*targetgroup.Group{
+ {
+ Source: "region-ipv4",
+ Targets: []model.LabelSet{
+ {
+ "__address__": model.LabelValue("5.6.7.8:4242"),
+ "__meta_ec2_ami": model.LabelValue("ami-ipv4"),
+ "__meta_ec2_availability_zone": model.LabelValue("azname-c"),
+ "__meta_ec2_availability_zone_id": model.LabelValue("azid-3"),
+ "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"),
+ "__meta_ec2_instance_state": model.LabelValue("running"),
+ "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"),
+ "__meta_ec2_owner_id": model.LabelValue(""),
+ "__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"),
+ "__meta_ec2_private_ip": model.LabelValue("5.6.7.8"),
+ "__meta_ec2_region": model.LabelValue("region-ipv4"),
+ "__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"),
+ "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"),
+ },
+ },
+ },
+ },
+ },
+ {
+ name: "Ipv6",
+ ec2Data: &ec2DataStore{
+ region: "region-ipv6",
+ azToAZID: map[string]string{
+ "azname-a": "azid-1",
+ "azname-b": "azid-2",
+ "azname-c": "azid-3",
+ },
+ instances: []*ec2.Instance{
+ {
+ // just the minimum needed for the refresh work
+ ImageId: strptr("ami-ipv6"),
+ InstanceId: strptr("instance-id-ipv6"),
+ InstanceType: strptr("instance-type-ipv6"),
+ Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")},
+ PrivateIpAddress: strptr("9.10.11.12"),
+ State: &ec2.InstanceState{Name: strptr("running")},
+ SubnetId: strptr("azid-2"),
+ VpcId: strptr("vpc-ipv6"),
+ // network interfaces
+ NetworkInterfaces: []*ec2.InstanceNetworkInterface{
+ // interface without primary IPv6, index 2
+ {
+ Attachment: &ec2.InstanceNetworkInterfaceAttachment{
+ DeviceIndex: int64ptr(3),
+ },
+ Ipv6Addresses: []*ec2.InstanceIpv6Address{
+ {
+ Ipv6Address: strptr("2001:db8:2::1:1"),
+ IsPrimaryIpv6: boolptr(false),
+ },
+ },
+ SubnetId: strptr("azid-2"),
+ },
+ // interface with primary IPv6, index 1
+ {
+ Attachment: &ec2.InstanceNetworkInterfaceAttachment{
+ DeviceIndex: int64ptr(1),
+ },
+ Ipv6Addresses: []*ec2.InstanceIpv6Address{
+ {
+ Ipv6Address: strptr("2001:db8:2::2:1"),
+ IsPrimaryIpv6: boolptr(false),
+ },
+ {
+ Ipv6Address: strptr("2001:db8:2::2:2"),
+ IsPrimaryIpv6: boolptr(true),
+ },
+ },
+ SubnetId: strptr("azid-2"),
+ },
+ // interface with primary IPv6, index 3
+ {
+ Attachment: &ec2.InstanceNetworkInterfaceAttachment{
+ DeviceIndex: int64ptr(3),
+ },
+ Ipv6Addresses: []*ec2.InstanceIpv6Address{
+ {
+ Ipv6Address: strptr("2001:db8:2::3:1"),
+ IsPrimaryIpv6: boolptr(true),
+ },
+ },
+ SubnetId: strptr("azid-1"),
+ },
+ // interface without primary IPv6, index 0
+ {
+ Attachment: &ec2.InstanceNetworkInterfaceAttachment{
+ DeviceIndex: int64ptr(0),
+ },
+ Ipv6Addresses: []*ec2.InstanceIpv6Address{},
+ SubnetId: strptr("azid-3"),
+ },
+ },
+ },
+ },
+ },
+ expected: []*targetgroup.Group{
+ {
+ Source: "region-ipv6",
+ Targets: []model.LabelSet{
+ {
+ "__address__": model.LabelValue("9.10.11.12:4242"),
+ "__meta_ec2_ami": model.LabelValue("ami-ipv6"),
+ "__meta_ec2_availability_zone": model.LabelValue("azname-b"),
+ "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"),
+ "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"),
+ "__meta_ec2_instance_state": model.LabelValue("running"),
+ "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"),
+ "__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"),
+ "__meta_ec2_owner_id": model.LabelValue(""),
+ "__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"),
+ "__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"),
+ "__meta_ec2_private_ip": model.LabelValue("9.10.11.12"),
+ "__meta_ec2_region": model.LabelValue("region-ipv6"),
+ "__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"),
+ "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"),
+ },
+ },
+ },
+ },
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ client := newMockEC2Client(tt.ec2Data)
+
+ d := &EC2Discovery{
+ ec2: client,
+ cfg: &EC2SDConfig{
+ Port: 4242,
+ Region: client.ec2Data.region,
+ },
+ }
+
+ g, err := d.refresh(ctx)
+ require.NoError(t, err)
+ require.Equal(t, tt.expected, g)
+ })
+ }
+}
+
+// EC2 client mock.
+type mockEC2Client struct {
+ ec2iface.EC2API
+ ec2Data ec2DataStore
+}
+
+func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client {
+ client := mockEC2Client{
+ ec2Data: *ec2Data,
+ }
+ return &client
+}
+
+func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(_ aws.Context, _ *ec2.DescribeAvailabilityZonesInput, _ ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) {
+ if len(m.ec2Data.azToAZID) == 0 {
+ return nil, errors.New("No AZs found")
+ }
+
+ azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID))
+
+ i := 0
+ for k, v := range m.ec2Data.azToAZID {
+ azs[i] = &ec2.AvailabilityZone{
+ ZoneName: strptr(k),
+ ZoneId: strptr(v),
+ }
+ i++
+ }
+
+ return &ec2.DescribeAvailabilityZonesOutput{
+ AvailabilityZones: azs,
+ }, nil
+}
+
+func (m *mockEC2Client) DescribeInstancesPagesWithContext(_ aws.Context, _ *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, _ ...request.Option) error {
+ r := ec2.Reservation{}
+ r.SetInstances(m.ec2Data.instances)
+ r.SetOwnerId(m.ec2Data.ownerID)
+
+ o := ec2.DescribeInstancesOutput{}
+ o.SetReservations([]*ec2.Reservation{&r})
+
+ _ = fn(&o, true)
+
+ return nil
+}
diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go
index 0ad7f2d541..ff1059ede0 100644
--- a/discovery/aws/lightsail.go
+++ b/discovery/aws/lightsail.go
@@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
"strings"
@@ -29,10 +30,10 @@ import (
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/lightsail"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@@ -82,7 +83,7 @@ type LightsailSDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*LightsailSDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*LightsailSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &lightsailMetrics{
refreshMetrics: rmi,
}
@@ -114,6 +115,7 @@ func (c *LightsailSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
region, err := metadata.Region()
if err != nil {
+ //nolint:staticcheck // Capitalized first word.
return errors.New("Lightsail SD configuration requires a region")
}
c.Region = region
@@ -130,14 +132,14 @@ type LightsailDiscovery struct {
}
// NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets.
-func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) {
+func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) {
m, ok := metrics.(*lightsailMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
d := &LightsailDiscovery{
diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go
index 70d95b9f3a..670afb5a4e 100644
--- a/discovery/azure/azure.go
+++ b/discovery/azure/azure.go
@@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"math/rand"
"net"
"net/http"
@@ -35,12 +36,10 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
-
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
"github.com/prometheus/prometheus/discovery"
@@ -70,18 +69,14 @@ const (
authMethodManagedIdentity = "ManagedIdentity"
)
-var (
- userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
-
- // DefaultSDConfig is the default Azure SD configuration.
- DefaultSDConfig = SDConfig{
- Port: 80,
- RefreshInterval: model.Duration(5 * time.Minute),
- Environment: "AzurePublicCloud",
- AuthenticationMethod: authMethodOAuth,
- HTTPClientConfig: config_util.DefaultHTTPClientConfig,
- }
-)
+// DefaultSDConfig is the default Azure SD configuration.
+var DefaultSDConfig = SDConfig{
+ Port: 80,
+ RefreshInterval: model.Duration(5 * time.Minute),
+ Environment: "AzurePublicCloud",
+ AuthenticationMethod: authMethodOAuth,
+ HTTPClientConfig: config_util.DefaultHTTPClientConfig,
+}
var environments = map[string]cloud.Configuration{
"AZURECHINACLOUD": cloud.AzureChina,
@@ -175,7 +170,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
type Discovery struct {
*refresh.Discovery
- logger log.Logger
+ logger *slog.Logger
cfg *SDConfig
port int
cache *cache.Cache[string, *armnetwork.Interface]
@@ -183,14 +178,14 @@ type Discovery struct {
}
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
-func NewDiscovery(cfg *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*azureMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000)))
d := &Discovery{
@@ -228,26 +223,26 @@ type azureClient struct {
vm *armcompute.VirtualMachinesClient
vmss *armcompute.VirtualMachineScaleSetsClient
vmssvm *armcompute.VirtualMachineScaleSetVMsClient
- logger log.Logger
+ logger *slog.Logger
}
var _ client = &azureClient{}
-// createAzureClient is a helper function for creating an Azure compute client to ARM.
-func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
- cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment)
+// createAzureClient is a helper method for creating an Azure compute client to ARM.
+func (d *Discovery) createAzureClient() (client, error) {
+ cloudConfiguration, err := CloudConfigurationFromName(d.cfg.Environment)
if err != nil {
return &azureClient{}, err
}
var c azureClient
- c.logger = logger
+ c.logger = d.logger
telemetry := policy.TelemetryOptions{
- ApplicationID: userAgent,
+ ApplicationID: version.PrometheusUserAgent(),
}
- credential, err := newCredential(cfg, policy.ClientOptions{
+ credential, err := newCredential(*d.cfg, policy.ClientOptions{
Cloud: cloudConfiguration,
Telemetry: telemetry,
})
@@ -255,7 +250,7 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
return &azureClient{}, err
}
- client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd")
+ client, err := config_util.NewClientFromConfig(d.cfg.HTTPClientConfig, "azure_sd")
if err != nil {
return &azureClient{}, err
}
@@ -267,22 +262,22 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
},
}
- c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options)
+ c.vm, err = armcompute.NewVirtualMachinesClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
- c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options)
+ c.nic, err = armnetwork.NewInterfacesClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
- c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options)
+ c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
- c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options)
+ c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
@@ -337,35 +332,27 @@ type virtualMachine struct {
}
// Create a new azureResource object from an ID string.
-func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, error) {
+func newAzureResourceFromID(id string, logger *slog.Logger) (*arm.ResourceID, error) {
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
resourceID, err := arm.ParseResourceID(id)
if err != nil {
err := fmt.Errorf("invalid ID '%s': %w", id, err)
- level.Error(logger).Log("err", err)
+ logger.Error("Failed to parse resource ID", "err", err)
return &arm.ResourceID{}, err
}
return resourceID, nil
}
-func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
- defer level.Debug(d.logger).Log("msg", "Azure discovery completed")
-
- client, err := createAzureClient(*d.cfg, d.logger)
- if err != nil {
- d.metrics.failuresCount.Inc()
- return nil, fmt.Errorf("could not create Azure client: %w", err)
- }
-
+func (d *Discovery) refreshAzureClient(ctx context.Context, client client) ([]*targetgroup.Group, error) {
machines, err := client.getVMs(ctx, d.cfg.ResourceGroup)
if err != nil {
d.metrics.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machines: %w", err)
}
- level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines))
+ d.logger.Debug("Found virtual machines during Azure discovery.", "count", len(machines))
// Load the vms managed by scale sets.
scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup)
@@ -418,6 +405,18 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
return []*targetgroup.Group{&tg}, nil
}
+func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
+ defer d.logger.Debug("Azure discovery completed")
+
+ client, err := d.createAzureClient()
+ if err != nil {
+ d.metrics.failuresCount.Inc()
+ return nil, fmt.Errorf("could not create Azure client: %w", err)
+ }
+
+ return d.refreshAzureClient(ctx, client)
+}
+
func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualMachine) (model.LabelSet, error) {
r, err := newAzureResourceFromID(vm.ID, d.logger)
if err != nil {
@@ -458,11 +457,10 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM
networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID)
}
if err != nil {
- if errors.Is(err, errorNotFound) {
- level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
- } else {
+ if !errors.Is(err, errorNotFound) {
return nil, err
}
+ d.logger.Warn("Network interface does not exist", "name", nicID, "err", err)
// Get out of this routine because we cannot continue without a network interface.
return nil, nil
}
@@ -480,7 +478,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM
// yet support this. On deallocated machines, this value happens to be nil so it
// is a cheap and easy way to determine if a machine is allocated or not.
if networkInterface.Properties.Primary == nil {
- level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name)
+ d.logger.Debug("Skipping deallocated virtual machine", "machine", vm.Name)
return nil, nil
}
@@ -724,7 +722,7 @@ func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
rs := time.Duration(random) * time.Second
exptime := time.Duration(d.cfg.RefreshInterval*10) + rs
d.cache.Set(nicID, netInt, cache.WithExpiration(exptime))
- level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds())
+ d.logger.Debug("Adding nic", "nic", nicID, "time", exptime.Seconds())
}
// getFromCache will get the network Interface for the specified nicID
diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go
index 32dab66c8c..d7141561e2 100644
--- a/discovery/azure/azure_test.go
+++ b/discovery/azure/azure_test.go
@@ -15,19 +15,34 @@ package azure
import (
"context"
- "fmt"
+ "log/slog"
+ "net/http"
+ "slices"
+ "strings"
"testing"
+ "github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
+ azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake"
+ "github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
+ fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
+ fakenetwork "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4/fake"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
- "github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
+
+ "github.com/prometheus/prometheus/discovery"
+ "github.com/prometheus/prometheus/discovery/targetgroup"
)
+const defaultMockNetworkID string = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}"
+
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m,
goleak.IgnoreTopFunction("github.com/Code-Hex/go-generics-cache.(*janitor).run.func1"),
@@ -96,13 +111,12 @@ func TestVMToLabelSet(t *testing.T) {
vmType := "type"
location := "westeurope"
computerName := "computer_name"
- networkID := "/subscriptions/00000000-0000-0000-0000-000000000000/network1"
ipAddress := "10.20.30.40"
primary := true
networkProfile := armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
{
- ID: &networkID,
+ ID: to.Ptr(defaultMockNetworkID),
Properties: &armcompute.NetworkInterfaceReferenceProperties{Primary: &primary},
},
},
@@ -139,7 +153,7 @@ func TestVMToLabelSet(t *testing.T) {
Location: location,
OsType: "Linux",
Tags: map[string]*string{},
- NetworkInterfaces: []string{networkID},
+ NetworkInterfaces: []string{defaultMockNetworkID},
Size: size,
}
@@ -150,11 +164,12 @@ func TestVMToLabelSet(t *testing.T) {
cfg := DefaultSDConfig
d := &Discovery{
cfg: &cfg,
- logger: log.NewNopLogger(),
+ logger: promslog.NewNopLogger(),
cache: cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5))),
}
network := armnetwork.Interface{
- Name: &networkID,
+ Name: to.Ptr(defaultMockNetworkID),
+ ID: to.Ptr(defaultMockNetworkID),
Properties: &armnetwork.InterfacePropertiesFormat{
Primary: &primary,
IPConfigurations: []*armnetwork.InterfaceIPConfiguration{
@@ -164,9 +179,9 @@ func TestVMToLabelSet(t *testing.T) {
},
},
}
- client := &mockAzureClient{
- networkInterface: &network,
- }
+
+ client := createMockAzureClient(t, nil, nil, nil, network, nil)
+
labelSet, err := d.vmToLabelSet(context.Background(), client, actualVM)
require.NoError(t, err)
require.Len(t, labelSet, 11)
@@ -475,34 +490,372 @@ func TestNewAzureResourceFromID(t *testing.T) {
}
}
+func TestAzureRefresh(t *testing.T) {
+ tests := []struct {
+ scenario string
+ vmResp []armcompute.VirtualMachinesClientListAllResponse
+ vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse
+ vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse
+ interfacesResp armnetwork.Interface
+ expectedTG []*targetgroup.Group
+ }{
+ {
+ scenario: "VMs, VMSS and VMSSVMs in Multiple Responses",
+ vmResp: []armcompute.VirtualMachinesClientListAllResponse{
+ {
+ VirtualMachineListResult: armcompute.VirtualMachineListResult{
+ Value: []*armcompute.VirtualMachine{
+ defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1"), to.Ptr("vm1")),
+ defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2"), to.Ptr("vm2")),
+ },
+ },
+ },
+ {
+ VirtualMachineListResult: armcompute.VirtualMachineListResult{
+ Value: []*armcompute.VirtualMachine{
+ defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3"), to.Ptr("vm3")),
+ defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4"), to.Ptr("vm4")),
+ },
+ },
+ },
+ },
+ vmssResp: []armcompute.VirtualMachineScaleSetsClientListAllResponse{
+ {
+ VirtualMachineScaleSetListWithLinkResult: armcompute.VirtualMachineScaleSetListWithLinkResult{
+ Value: []*armcompute.VirtualMachineScaleSet{
+ {
+ ID: to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1"),
+ Name: to.Ptr("vmScaleSet1"),
+ Location: to.Ptr("australiaeast"),
+ Type: to.Ptr("Microsoft.Compute/virtualMachineScaleSets"),
+ },
+ },
+ },
+ },
+ },
+ vmssvmResp: []armcompute.VirtualMachineScaleSetVMsClientListResponse{
+ {
+ VirtualMachineScaleSetVMListResult: armcompute.VirtualMachineScaleSetVMListResult{
+ Value: []*armcompute.VirtualMachineScaleSetVM{
+ defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1"), to.Ptr("vmScaleSet1_vm1")),
+ defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2"), to.Ptr("vmScaleSet1_vm2")),
+ },
+ },
+ },
+ },
+ interfacesResp: armnetwork.Interface{
+ ID: to.Ptr(defaultMockNetworkID),
+ Properties: &armnetwork.InterfacePropertiesFormat{
+ Primary: to.Ptr(true),
+ IPConfigurations: []*armnetwork.InterfaceIPConfiguration{
+ {Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{
+ PrivateIPAddress: to.Ptr("10.0.0.1"),
+ }},
+ },
+ },
+ },
+ expectedTG: []*targetgroup.Group{
+ {
+ Targets: []model.LabelSet{
+ {
+ "__address__": "10.0.0.1:80",
+ "__meta_azure_machine_computer_name": "computer_name",
+ "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1",
+ "__meta_azure_machine_location": "australiaeast",
+ "__meta_azure_machine_name": "vm1",
+ "__meta_azure_machine_os_type": "Linux",
+ "__meta_azure_machine_private_ip": "10.0.0.1",
+ "__meta_azure_machine_resource_group": "{resourceGroup}",
+ "__meta_azure_machine_size": "size",
+ "__meta_azure_machine_tag_prometheus": "",
+ "__meta_azure_subscription_id": "",
+ "__meta_azure_tenant_id": "",
+ },
+ {
+ "__address__": "10.0.0.1:80",
+ "__meta_azure_machine_computer_name": "computer_name",
+ "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2",
+ "__meta_azure_machine_location": "australiaeast",
+ "__meta_azure_machine_name": "vm2",
+ "__meta_azure_machine_os_type": "Linux",
+ "__meta_azure_machine_private_ip": "10.0.0.1",
+ "__meta_azure_machine_resource_group": "{resourceGroup}",
+ "__meta_azure_machine_size": "size",
+ "__meta_azure_machine_tag_prometheus": "",
+ "__meta_azure_subscription_id": "",
+ "__meta_azure_tenant_id": "",
+ },
+ {
+ "__address__": "10.0.0.1:80",
+ "__meta_azure_machine_computer_name": "computer_name",
+ "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3",
+ "__meta_azure_machine_location": "australiaeast",
+ "__meta_azure_machine_name": "vm3",
+ "__meta_azure_machine_os_type": "Linux",
+ "__meta_azure_machine_private_ip": "10.0.0.1",
+ "__meta_azure_machine_resource_group": "{resourceGroup}",
+ "__meta_azure_machine_size": "size",
+ "__meta_azure_machine_tag_prometheus": "",
+ "__meta_azure_subscription_id": "",
+ "__meta_azure_tenant_id": "",
+ },
+ {
+ "__address__": "10.0.0.1:80",
+ "__meta_azure_machine_computer_name": "computer_name",
+ "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4",
+ "__meta_azure_machine_location": "australiaeast",
+ "__meta_azure_machine_name": "vm4",
+ "__meta_azure_machine_os_type": "Linux",
+ "__meta_azure_machine_private_ip": "10.0.0.1",
+ "__meta_azure_machine_resource_group": "{resourceGroup}",
+ "__meta_azure_machine_size": "size",
+ "__meta_azure_machine_tag_prometheus": "",
+ "__meta_azure_subscription_id": "",
+ "__meta_azure_tenant_id": "",
+ },
+ {
+ "__address__": "10.0.0.1:80",
+ "__meta_azure_machine_computer_name": "computer_name",
+ "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1",
+ "__meta_azure_machine_location": "australiaeast",
+ "__meta_azure_machine_name": "vmScaleSet1_vm1",
+ "__meta_azure_machine_os_type": "Linux",
+ "__meta_azure_machine_private_ip": "10.0.0.1",
+ "__meta_azure_machine_resource_group": "{resourceGroup}",
+ "__meta_azure_machine_scale_set": "vmScaleSet1",
+ "__meta_azure_machine_size": "size",
+ "__meta_azure_machine_tag_prometheus": "",
+ "__meta_azure_subscription_id": "",
+ "__meta_azure_tenant_id": "",
+ },
+ {
+ "__address__": "10.0.0.1:80",
+ "__meta_azure_machine_computer_name": "computer_name",
+ "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2",
+ "__meta_azure_machine_location": "australiaeast",
+ "__meta_azure_machine_name": "vmScaleSet1_vm2",
+ "__meta_azure_machine_os_type": "Linux",
+ "__meta_azure_machine_private_ip": "10.0.0.1",
+ "__meta_azure_machine_resource_group": "{resourceGroup}",
+ "__meta_azure_machine_scale_set": "vmScaleSet1",
+ "__meta_azure_machine_size": "size",
+ "__meta_azure_machine_tag_prometheus": "",
+ "__meta_azure_subscription_id": "",
+ "__meta_azure_tenant_id": "",
+ },
+ },
+ },
+ },
+ },
+ }
+ for _, tc := range tests {
+ t.Run(tc.scenario, func(t *testing.T) {
+ t.Parallel()
+ azureSDConfig := &DefaultSDConfig
+
+ azureClient := createMockAzureClient(t, tc.vmResp, tc.vmssResp, tc.vmssvmResp, tc.interfacesResp, nil)
+
+ reg := prometheus.NewRegistry()
+ refreshMetrics := discovery.NewRefreshMetrics(reg)
+ metrics := azureSDConfig.NewDiscovererMetrics(reg, refreshMetrics)
+
+ sd, err := NewDiscovery(azureSDConfig, nil, metrics)
+ require.NoError(t, err)
+
+ tg, err := sd.refreshAzureClient(context.Background(), azureClient)
+ require.NoError(t, err)
+
+ sortTargetsByID(tg[0].Targets)
+ require.Equal(t, tc.expectedTG, tg)
+ })
+ }
+}
+
type mockAzureClient struct {
- networkInterface *armnetwork.Interface
+ azureClient
}
-var _ client = &mockAzureClient{}
+func createMockAzureClient(t *testing.T, vmResp []armcompute.VirtualMachinesClientListAllResponse, vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse, vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse, interfaceResp armnetwork.Interface, logger *slog.Logger) client {
+ t.Helper()
+ mockVMServer := defaultMockVMServer(vmResp)
+ mockVMSSServer := defaultMockVMSSServer(vmssResp)
+ mockVMScaleSetVMServer := defaultMockVMSSVMServer(vmssvmResp)
+ mockInterfaceServer := defaultMockInterfaceServer(interfaceResp)
-func (*mockAzureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) {
- return nil, nil
-}
+ vmClient, err := armcompute.NewVirtualMachinesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
+ ClientOptions: azcore.ClientOptions{
+ Transport: fake.NewVirtualMachinesServerTransport(&mockVMServer),
+ },
+ })
+ require.NoError(t, err)
-func (*mockAzureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) {
- return nil, nil
-}
+ vmssClient, err := armcompute.NewVirtualMachineScaleSetsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
+ ClientOptions: azcore.ClientOptions{
+ Transport: fake.NewVirtualMachineScaleSetsServerTransport(&mockVMSSServer),
+ },
+ })
+ require.NoError(t, err)
-func (*mockAzureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) {
- return nil, nil
-}
+ vmssvmClient, err := armcompute.NewVirtualMachineScaleSetVMsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
+ ClientOptions: azcore.ClientOptions{
+ Transport: fake.NewVirtualMachineScaleSetVMsServerTransport(&mockVMScaleSetVMServer),
+ },
+ })
+ require.NoError(t, err)
-func (m *mockAzureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
- if networkInterfaceID == "" {
- return nil, fmt.Errorf("parameter networkInterfaceID cannot be empty")
+ interfacesClient, err := armnetwork.NewInterfacesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
+ ClientOptions: azcore.ClientOptions{
+ Transport: fakenetwork.NewInterfacesServerTransport(&mockInterfaceServer),
+ },
+ })
+ require.NoError(t, err)
+
+ return &mockAzureClient{
+ azureClient: azureClient{
+ vm: vmClient,
+ vmss: vmssClient,
+ vmssvm: vmssvmClient,
+ nic: interfacesClient,
+ logger: logger,
+ },
}
- return m.networkInterface, nil
}
-func (m *mockAzureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) {
- if scaleSetName == "" {
- return nil, fmt.Errorf("parameter virtualMachineScaleSetName cannot be empty")
+func defaultMockInterfaceServer(interfaceResp armnetwork.Interface) fakenetwork.InterfacesServer {
+ return fakenetwork.InterfacesServer{
+ Get: func(_ context.Context, _, _ string, _ *armnetwork.InterfacesClientGetOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetResponse], errResp azfake.ErrorResponder) {
+ resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetResponse{Interface: interfaceResp}, nil)
+ return
+ },
+ GetVirtualMachineScaleSetNetworkInterface: func(_ context.Context, _, _, _, _ string, _ *armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse], errResp azfake.ErrorResponder) {
+ resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse{Interface: interfaceResp}, nil)
+ return
+ },
}
- return m.networkInterface, nil
+}
+
+func defaultMockVMServer(vmResp []armcompute.VirtualMachinesClientListAllResponse) fake.VirtualMachinesServer {
+ return fake.VirtualMachinesServer{
+ NewListAllPager: func(_ *armcompute.VirtualMachinesClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachinesClientListAllResponse]) {
+ for _, page := range vmResp {
+ resp.AddPage(http.StatusOK, page, nil)
+ }
+ return
+ },
+ }
+}
+
+func defaultMockVMSSServer(vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse) fake.VirtualMachineScaleSetsServer {
+ return fake.VirtualMachineScaleSetsServer{
+ NewListAllPager: func(_ *armcompute.VirtualMachineScaleSetsClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetsClientListAllResponse]) {
+ for _, page := range vmssResp {
+ resp.AddPage(http.StatusOK, page, nil)
+ }
+ return
+ },
+ }
+}
+
+func defaultMockVMSSVMServer(vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse) fake.VirtualMachineScaleSetVMsServer {
+ return fake.VirtualMachineScaleSetVMsServer{
+ NewListPager: func(_, _ string, _ *armcompute.VirtualMachineScaleSetVMsClientListOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetVMsClientListResponse]) {
+ for _, page := range vmssvmResp {
+ resp.AddPage(http.StatusOK, page, nil)
+ }
+ return
+ },
+ }
+}
+
+func defaultVMWithIDAndName(id, name *string) *armcompute.VirtualMachine {
+ vmSize := armcompute.VirtualMachineSizeTypes("size")
+ osType := armcompute.OperatingSystemTypesLinux
+ defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/testVM"
+ defaultName := "testVM"
+
+ if id == nil {
+ id = &defaultID
+ }
+ if name == nil {
+ name = &defaultName
+ }
+
+ return &armcompute.VirtualMachine{
+ ID: id,
+ Name: name,
+ Type: to.Ptr("Microsoft.Compute/virtualMachines"),
+ Location: to.Ptr("australiaeast"),
+ Properties: &armcompute.VirtualMachineProperties{
+ OSProfile: &armcompute.OSProfile{
+ ComputerName: to.Ptr("computer_name"),
+ },
+ StorageProfile: &armcompute.StorageProfile{
+ OSDisk: &armcompute.OSDisk{
+ OSType: &osType,
+ },
+ },
+ NetworkProfile: &armcompute.NetworkProfile{
+ NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
+ {
+ ID: to.Ptr(defaultMockNetworkID),
+ },
+ },
+ },
+ HardwareProfile: &armcompute.HardwareProfile{
+ VMSize: &vmSize,
+ },
+ },
+ Tags: map[string]*string{
+ "prometheus": new(string),
+ },
+ }
+}
+
+func defaultVMSSVMWithIDAndName(id, name *string) *armcompute.VirtualMachineScaleSetVM {
+ vmSize := armcompute.VirtualMachineSizeTypes("size")
+ osType := armcompute.OperatingSystemTypesLinux
+ defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/testVMScaleSet/virtualMachines/testVM"
+ defaultName := "testVM"
+
+ if id == nil {
+ id = &defaultID
+ }
+ if name == nil {
+ name = &defaultName
+ }
+
+ return &armcompute.VirtualMachineScaleSetVM{
+ ID: id,
+ Name: name,
+ Type: to.Ptr("Microsoft.Compute/virtualMachines"),
+ InstanceID: to.Ptr("123"),
+ Location: to.Ptr("australiaeast"),
+ Properties: &armcompute.VirtualMachineScaleSetVMProperties{
+ OSProfile: &armcompute.OSProfile{
+ ComputerName: to.Ptr("computer_name"),
+ },
+ StorageProfile: &armcompute.StorageProfile{
+ OSDisk: &armcompute.OSDisk{
+ OSType: &osType,
+ },
+ },
+ NetworkProfile: &armcompute.NetworkProfile{
+ NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
+ {ID: to.Ptr(defaultMockNetworkID)},
+ },
+ },
+ HardwareProfile: &armcompute.HardwareProfile{
+ VMSize: &vmSize,
+ },
+ },
+ Tags: map[string]*string{
+ "prometheus": new(string),
+ },
+ }
+}
+
+func sortTargetsByID(targets []model.LabelSet) {
+ slices.SortFunc(targets, func(a, b model.LabelSet) int {
+ return strings.Compare(string(a["__meta_azure_machine_id"]), string(b["__meta_azure_machine_id"]))
+ })
}
diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go
index bdc1fc8dce..4c8de6e291 100644
--- a/discovery/consul/consul.go
+++ b/discovery/consul/consul.go
@@ -17,17 +17,18 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
+ "slices"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
consul "github.com/hashicorp/consul/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -113,8 +114,11 @@ type SDConfig struct {
Services []string `yaml:"services,omitempty"`
// A list of tags used to filter instances inside a service. Services must contain all tags in the list.
ServiceTags []string `yaml:"tags,omitempty"`
- // Desired node metadata.
+ // Desired node metadata. As of Consul 1.14, consider `filter` instead.
NodeMeta map[string]string `yaml:"node_meta,omitempty"`
+ // Consul filter string
+ // See https://www.consul.io/api-docs/catalog#filtering-1, for syntax
+ Filter string `yaml:"filter,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
}
@@ -174,22 +178,23 @@ type Discovery struct {
watchedServices []string // Set of services which will be discovered.
watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string
+ watchedFilter string
allowStale bool
refreshInterval time.Duration
finalizer func()
- logger log.Logger
+ logger *slog.Logger
metrics *consulMetrics
}
// NewDiscovery returns a new Discovery for the given config.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*consulMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
wrapper, err := config.NewClientFromConfig(conf.HTTPClientConfig, "consul_sd", config.WithIdleConnTimeout(2*watchTimeout))
@@ -218,6 +223,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
watchedServices: conf.Services,
watchedTags: conf.ServiceTags,
watchedNodeMeta: conf.NodeMeta,
+ watchedFilter: conf.Filter,
allowStale: conf.AllowStale,
refreshInterval: time.Duration(conf.RefreshInterval),
clientDatacenter: conf.Datacenter,
@@ -236,22 +242,17 @@ func (d *Discovery) shouldWatch(name string, tags []string) bool {
return d.shouldWatchFromName(name) && d.shouldWatchFromTags(tags)
}
-// shouldWatch returns whether the service of the given name should be watched based on its name.
+// shouldWatchFromName returns whether the service of the given name should be watched based on its name.
func (d *Discovery) shouldWatchFromName(name string) bool {
// If there's no fixed set of watched services, we watch everything.
if len(d.watchedServices) == 0 {
return true
}
- for _, sn := range d.watchedServices {
- if sn == name {
- return true
- }
- }
- return false
+ return slices.Contains(d.watchedServices, name)
}
-// shouldWatch returns whether the service of the given name should be watched based on its tags.
+// shouldWatchFromTags returns whether the service of the given name should be watched based on its tags.
// This gets called when the user doesn't specify a list of services in order to avoid watching
// *all* services. Details in https://github.com/prometheus/prometheus/pull/3814
func (d *Discovery) shouldWatchFromTags(tags []string) bool {
@@ -282,7 +283,7 @@ func (d *Discovery) getDatacenter() error {
info, err := d.client.Agent().Self()
if err != nil {
- level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
+ d.logger.Error("Error retrieving datacenter name", "err", err)
d.metrics.rpcFailuresCount.Inc()
return err
}
@@ -290,12 +291,12 @@ func (d *Discovery) getDatacenter() error {
dc, ok := info["Config"]["Datacenter"].(string)
if !ok {
err := fmt.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"])
- level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
+ d.logger.Error("Error retrieving datacenter name", "err", err)
return err
}
d.clientDatacenter = dc
- d.logger = log.With(d.logger, "datacenter", dc)
+ d.logger = d.logger.With("datacenter", dc)
return nil
}
@@ -361,13 +362,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
// entire list of services.
func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) {
catalog := d.client.Catalog()
- level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ","))
+ d.logger.Debug("Watching services", "tags", strings.Join(d.watchedTags, ","), "filter", d.watchedFilter)
opts := &consul.QueryOptions{
WaitIndex: *lastIndex,
WaitTime: watchTimeout,
AllowStale: d.allowStale,
NodeMeta: d.watchedNodeMeta,
+ Filter: d.watchedFilter,
}
t0 := time.Now()
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
@@ -382,7 +384,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
}
if err != nil {
- level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
+ d.logger.Error("Error refreshing service list", "err", err)
d.metrics.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
@@ -445,7 +447,7 @@ type consulService struct {
discovery *Discovery
client *consul.Client
tagSeparator string
- logger log.Logger
+ logger *slog.Logger
rpcFailuresCount prometheus.Counter
serviceRPCDuration prometheus.Observer
}
@@ -490,7 +492,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
// Get updates for a service.
func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, health *consul.Health, lastIndex *uint64) {
- level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ","))
+ srv.logger.Debug("Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ","))
opts := &consul.QueryOptions{
WaitIndex: *lastIndex,
@@ -513,7 +515,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
}
if err != nil {
- level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
+ srv.logger.Error("Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
srv.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
diff --git a/discovery/consul/consul_test.go b/discovery/consul/consul_test.go
index e3bc7938f5..ea896ce31b 100644
--- a/discovery/consul/consul_test.go
+++ b/discovery/consul/consul_test.go
@@ -21,10 +21,10 @@ import (
"testing"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"gopkg.in/yaml.v2"
@@ -252,6 +252,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
case "/v1/catalog/services?index=1&wait=120000ms":
time.Sleep(5 * time.Second)
response = ServicesTestAnswer
+ case "/v1/catalog/services?filter=NodeMeta.rack_name+%3D%3D+%222304%22&index=1&wait=120000ms":
+ response = ServicesTestAnswer
default:
t.Errorf("Unhandled consul call: %s", r.URL)
}
@@ -270,7 +272,7 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
}
func newDiscovery(t *testing.T, config *SDConfig) *Discovery {
- logger := log.NewNopLogger()
+ logger := promslog.NewNopLogger()
metrics := NewTestMetrics(t, config, prometheus.NewRegistry())
@@ -369,6 +371,27 @@ func TestAllOptions(t *testing.T) {
<-ch
}
+// Watch the test service with a specific tag and node-meta via Filter parameter.
+func TestFilterOption(t *testing.T) {
+ stub, config := newServer(t)
+ defer stub.Close()
+
+ config.Services = []string{"test"}
+ config.Filter = `NodeMeta.rack_name == "2304"`
+ config.Token = "fake-token"
+
+ d := newDiscovery(t, config)
+
+ ctx, cancel := context.WithCancel(context.Background())
+ ch := make(chan []*targetgroup.Group)
+ go func() {
+ d.Run(ctx, ch)
+ close(ch)
+ }()
+ checkOneTarget(t, <-ch)
+ cancel()
+}
+
func TestGetDatacenterShouldReturnError(t *testing.T) {
for _, tc := range []struct {
handler func(http.ResponseWriter, *http.Request)
@@ -376,14 +399,14 @@ func TestGetDatacenterShouldReturnError(t *testing.T) {
}{
{
// Define a handler that will return status 500.
- handler: func(w http.ResponseWriter, r *http.Request) {
+ handler: func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
},
errMessage: "Unexpected response code: 500 ()",
},
{
// Define a handler that will return incorrect response.
- handler: func(w http.ResponseWriter, r *http.Request) {
+ handler: func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`{"Config": {"Not-Datacenter": "test-dc"}}`))
},
errMessage: "invalid value '' for Config.Datacenter",
@@ -402,14 +425,14 @@ func TestGetDatacenterShouldReturnError(t *testing.T) {
d := newDiscovery(t, config)
// Should be empty if not initialized.
- require.Equal(t, "", d.clientDatacenter)
+ require.Empty(t, d.clientDatacenter)
err = d.getDatacenter()
// An error should be returned.
- require.Equal(t, tc.errMessage, err.Error())
+ require.EqualError(t, err, tc.errMessage)
// Should still be empty.
- require.Equal(t, "", d.clientDatacenter)
+ require.Empty(t, d.clientDatacenter)
}
}
diff --git a/discovery/consul/metrics.go b/discovery/consul/metrics.go
index 8266e7cc60..b49509bd8f 100644
--- a/discovery/consul/metrics.go
+++ b/discovery/consul/metrics.go
@@ -31,7 +31,7 @@ type consulMetrics struct {
metricRegisterer discovery.MetricRegisterer
}
-func newDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func newDiscovererMetrics(reg prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
m := &consulMetrics{
rpcFailuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go
index ecee60cb1f..d0ececd9e9 100644
--- a/discovery/digitalocean/digitalocean.go
+++ b/discovery/digitalocean/digitalocean.go
@@ -15,7 +15,9 @@ package digitalocean
import (
"context"
+ "errors"
"fmt"
+ "log/slog"
"net"
"net/http"
"strconv"
@@ -23,7 +25,6 @@ import (
"time"
"github.com/digitalocean/godo"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -64,7 +65,7 @@ func init() {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &digitaloceanMetrics{
refreshMetrics: rmi,
}
@@ -111,10 +112,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*digitaloceanMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
d := &Discovery{
@@ -131,7 +132,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
Transport: rt,
Timeout: time.Duration(conf.RefreshInterval),
},
- godo.SetUserAgent(fmt.Sprintf("Prometheus/%s", version.Version)),
+ godo.SetUserAgent(version.PrometheusUserAgent()),
)
if err != nil {
return nil, fmt.Errorf("error setting up digital ocean agent: %w", err)
diff --git a/discovery/digitalocean/digitalocean_test.go b/discovery/digitalocean/digitalocean_test.go
index 841b5ef977..a282225ac2 100644
--- a/discovery/digitalocean/digitalocean_test.go
+++ b/discovery/digitalocean/digitalocean_test.go
@@ -19,9 +19,9 @@ import (
"net/url"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@@ -57,7 +57,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)
diff --git a/discovery/discoverer_metrics_noop.go b/discovery/discoverer_metrics_noop.go
index 638317ace1..4321204b6c 100644
--- a/discovery/discoverer_metrics_noop.go
+++ b/discovery/discoverer_metrics_noop.go
@@ -13,7 +13,7 @@
package discovery
-// Create a dummy metrics struct, because this SD doesn't have any metrics.
+// NoopDiscovererMetrics creates a dummy metrics struct, because this SD doesn't have any metrics.
type NoopDiscovererMetrics struct{}
var _ DiscovererMetrics = (*NoopDiscovererMetrics)(nil)
diff --git a/discovery/discovery.go b/discovery/discovery.go
index a5826f8176..c400de3632 100644
--- a/discovery/discovery.go
+++ b/discovery/discovery.go
@@ -15,9 +15,9 @@ package discovery
import (
"context"
+ "log/slog"
"reflect"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -39,7 +39,7 @@ type Discoverer interface {
Run(ctx context.Context, up chan<- []*targetgroup.Group)
}
-// Internal metrics of service discovery mechanisms.
+// DiscovererMetrics are internal metrics of service discovery mechanisms.
type DiscovererMetrics interface {
Register() error
Unregister()
@@ -47,7 +47,7 @@ type DiscovererMetrics interface {
// DiscovererOptions provides options for a Discoverer.
type DiscovererOptions struct {
- Logger log.Logger
+ Logger *slog.Logger
Metrics DiscovererMetrics
@@ -56,7 +56,7 @@ type DiscovererOptions struct {
HTTPClientOptions []config.HTTPClientOption
}
-// Metrics used by the "refresh" package.
+// RefreshMetrics are used by the "refresh" package.
// We define them here in the "discovery" package in order to avoid a cyclic dependency between
// "discovery" and "refresh".
type RefreshMetrics struct {
@@ -64,17 +64,18 @@ type RefreshMetrics struct {
Duration prometheus.Observer
}
-// Instantiate the metrics used by the "refresh" package.
+// RefreshMetricsInstantiator instantiates the metrics used by the "refresh" package.
type RefreshMetricsInstantiator interface {
Instantiate(mech string) *RefreshMetrics
}
-// An interface for registering, unregistering, and instantiating metrics for the "refresh" package.
-// Refresh metrics are registered and unregistered outside of the service discovery mechanism.
-// This is so that the same metrics can be reused across different service discovery mechanisms.
-// To manage refresh metrics inside the SD mechanism, we'd need to use const labels which are
-// specific to that SD. However, doing so would also expose too many unused metrics on
-// the Prometheus /metrics endpoint.
+// RefreshMetricsManager is an interface for registering, unregistering, and
+// instantiating metrics for the "refresh" package. Refresh metrics are
+// registered and unregistered outside of the service discovery mechanism. This
+// is so that the same metrics can be reused across different service discovery
+// mechanisms. To manage refresh metrics inside the SD mechanism, we'd need to
+// use const labels which are specific to that SD. However, doing so would also
+// expose too many unused metrics on the Prometheus /metrics endpoint.
type RefreshMetricsManager interface {
DiscovererMetrics
RefreshMetricsInstantiator
@@ -108,7 +109,7 @@ func (c *Configs) SetDirectory(dir string) {
// UnmarshalYAML implements yaml.Unmarshaler.
func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error {
- cfgTyp := getConfigType(configsType)
+ cfgTyp := reflect.StructOf(configFields)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()
@@ -123,7 +124,7 @@ func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error {
// MarshalYAML implements yaml.Marshaler.
func (c Configs) MarshalYAML() (interface{}, error) {
- cfgTyp := getConfigType(configsType)
+ cfgTyp := reflect.StructOf(configFields)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()
@@ -145,7 +146,8 @@ func (c StaticConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) {
return staticDiscoverer(c), nil
}
-// No metrics are needed for this service discovery mechanism.
+// NewDiscovererMetrics returns NoopDiscovererMetrics because no metrics are
+// needed for this service discovery mechanism.
func (c StaticConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
return &NoopDiscovererMetrics{}
}
diff --git a/discovery/discovery_test.go b/discovery/discovery_test.go
new file mode 100644
index 0000000000..af327195f2
--- /dev/null
+++ b/discovery/discovery_test.go
@@ -0,0 +1,36 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package discovery
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gopkg.in/yaml.v2"
+)
+
+func TestConfigsCustomUnMarshalMarshal(t *testing.T) {
+ input := `static_configs:
+- targets:
+ - foo:1234
+ - bar:4321
+`
+ cfg := &Configs{}
+ err := yaml.UnmarshalStrict([]byte(input), cfg)
+ require.NoError(t, err)
+
+ output, err := yaml.Marshal(cfg)
+ require.NoError(t, err)
+ require.Equal(t, input, string(output))
+}
diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go
index 314c3d38cd..405dba44f7 100644
--- a/discovery/dns/dns.go
+++ b/discovery/dns/dns.go
@@ -17,17 +17,17 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
"strings"
"sync"
"time"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@@ -111,21 +111,21 @@ type Discovery struct {
names []string
port int
qtype uint16
- logger log.Logger
+ logger *slog.Logger
metrics *dnsMetrics
- lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
+ lookupFn func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error)
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*dnsMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
qtype := dns.TypeSRV
@@ -174,7 +174,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
for _, name := range d.names {
go func(n string) {
if err := d.refreshOne(ctx, n, ch); err != nil && !errors.Is(err, context.Canceled) {
- level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err)
+ d.logger.Error("Error refreshing DNS targets", "err", err)
}
wg.Done()
}(name)
@@ -238,7 +238,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
// CNAME responses can occur with "Type: A" dns_sd_config requests.
continue
default:
- level.Warn(d.logger).Log("msg", "Invalid record", "record", record)
+ d.logger.Warn("Invalid record", "record", record)
continue
}
tg.Targets = append(tg.Targets, model.LabelSet{
@@ -288,7 +288,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
// error will be generic-looking, because trying to return all the errors
// returned by the combination of all name permutations and servers is a
// nightmare.
-func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
+func lookupWithSearchPath(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
conf, err := dns.ClientConfigFromFile(resolvConf)
if err != nil {
return nil, fmt.Errorf("could not load resolv.conf: %w", err)
@@ -337,14 +337,14 @@ func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Ms
// A non-viable answer is "anything else", which encompasses both various
// system-level problems (like network timeouts) and also
// valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc).
-func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) {
+func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger *slog.Logger) (*dns.Msg, error) {
client := &dns.Client{}
for _, server := range conf.Servers {
servAddr := net.JoinHostPort(server, conf.Port)
msg, err := askServerForName(name, qtype, client, servAddr, true)
if err != nil {
- level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err)
+ logger.Warn("DNS resolution failed", "server", server, "name", name, "err", err)
continue
}
diff --git a/discovery/dns/dns_test.go b/discovery/dns/dns_test.go
index 33a976827d..ea46ad3237 100644
--- a/discovery/dns/dns_test.go
+++ b/discovery/dns/dns_test.go
@@ -15,12 +15,12 @@ package dns
import (
"context"
- "fmt"
+ "errors"
+ "log/slog"
"net"
"testing"
"time"
- "github.com/go-kit/log"
"github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
@@ -40,7 +40,7 @@ func TestDNS(t *testing.T) {
testCases := []struct {
name string
config SDConfig
- lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
+ lookup func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error)
expected []*targetgroup.Group
}{
@@ -52,8 +52,8 @@ func TestDNS(t *testing.T) {
Port: 80,
Type: "A",
},
- lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
- return nil, fmt.Errorf("some error")
+ lookup: func(_ string, _ uint16, _ *slog.Logger) (*dns.Msg, error) {
+ return nil, errors.New("some error")
},
expected: []*targetgroup.Group{},
},
@@ -65,7 +65,7 @@ func TestDNS(t *testing.T) {
Port: 80,
Type: "A",
},
- lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
+ lookup: func(_ string, _ uint16, _ *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.A{A: net.IPv4(192, 0, 2, 2)},
@@ -97,7 +97,7 @@ func TestDNS(t *testing.T) {
Port: 80,
Type: "AAAA",
},
- lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
+ lookup: func(_ string, _ uint16, _ *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.AAAA{AAAA: net.IPv6loopback},
@@ -128,7 +128,7 @@ func TestDNS(t *testing.T) {
Type: "SRV",
RefreshInterval: model.Duration(time.Minute),
},
- lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
+ lookup: func(_ string, _ uint16, _ *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.SRV{Port: 3306, Target: "db1.example.com."},
@@ -167,7 +167,7 @@ func TestDNS(t *testing.T) {
Names: []string{"_mysql._tcp.db.example.com."},
RefreshInterval: model.Duration(time.Minute),
},
- lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
+ lookup: func(_ string, _ uint16, _ *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.SRV{Port: 3306, Target: "db1.example.com."},
@@ -198,7 +198,7 @@ func TestDNS(t *testing.T) {
Names: []string{"_mysql._tcp.db.example.com."},
RefreshInterval: model.Duration(time.Minute),
},
- lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
+ lookup: func(_ string, _ uint16, _ *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{}, nil
},
expected: []*targetgroup.Group{
@@ -215,7 +215,7 @@ func TestDNS(t *testing.T) {
Port: 25,
RefreshInterval: model.Duration(time.Minute),
},
- lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
+ lookup: func(_ string, _ uint16, _ *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.MX{Preference: 0, Mx: "smtp1.example.com."},
diff --git a/discovery/eureka/client.go b/discovery/eureka/client.go
index 5a90968f1b..e4b54faae6 100644
--- a/discovery/eureka/client.go
+++ b/discovery/eureka/client.go
@@ -23,7 +23,7 @@ import (
"github.com/prometheus/common/version"
)
-var userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
+var userAgent = version.PrometheusUserAgent()
type Applications struct {
VersionsDelta int `xml:"versions__delta"`
@@ -97,7 +97,6 @@ func fetchApps(ctx context.Context, server string, client *http.Client) (*Applic
resp.Body.Close()
}()
- //nolint:usestdlibvars
if resp.StatusCode/100 != 2 {
return nil, fmt.Errorf("non 2xx status '%d' response during eureka service discovery", resp.StatusCode)
}
diff --git a/discovery/eureka/client_test.go b/discovery/eureka/client_test.go
index 83f6fd5ff1..f85409a11e 100644
--- a/discovery/eureka/client_test.go
+++ b/discovery/eureka/client_test.go
@@ -172,7 +172,7 @@ func TestFetchApps(t *testing.T) {
`
// Simulate apps with a valid XML response.
- respHandler := func(w http.ResponseWriter, r *http.Request) {
+ respHandler := func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, appsXML)
@@ -199,7 +199,7 @@ func TestFetchApps(t *testing.T) {
func Test500ErrorHttpResponse(t *testing.T) {
// Simulate 500 error.
- respHandler := func(w http.ResponseWriter, r *http.Request) {
+ respHandler := func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, ``)
diff --git a/discovery/eureka/eureka.go b/discovery/eureka/eureka.go
index 779c081aee..459b608e96 100644
--- a/discovery/eureka/eureka.go
+++ b/discovery/eureka/eureka.go
@@ -16,14 +16,13 @@ package eureka
import (
"context"
"errors"
- "fmt"
+ "log/slog"
"net"
"net/http"
"net/url"
"strconv"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -78,7 +77,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &eurekaMetrics{
refreshMetrics: rmi,
}
@@ -126,10 +125,10 @@ type Discovery struct {
}
// NewDiscovery creates a new Eureka discovery for the given role.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*eurekaMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "eureka_sd")
diff --git a/discovery/eureka/eureka_test.go b/discovery/eureka/eureka_test.go
index b499410bfc..5ea9a6c74e 100644
--- a/discovery/eureka/eureka_test.go
+++ b/discovery/eureka/eureka_test.go
@@ -58,7 +58,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err
func TestEurekaSDHandleError(t *testing.T) {
var (
errTesting = "non 2xx status '500' response during eureka service discovery"
- respHandler = func(w http.ResponseWriter, r *http.Request) {
+ respHandler = func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, ``)
@@ -76,7 +76,7 @@ func TestEurekaSDEmptyList(t *testing.T) {
1
`
- respHandler = func(w http.ResponseWriter, r *http.Request) {
+ respHandler = func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, appsXML)
@@ -235,7 +235,7 @@ func TestEurekaSDSendGroup(t *testing.T) {
`
- respHandler = func(w http.ResponseWriter, r *http.Request) {
+ respHandler = func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, appsXML)
diff --git a/discovery/file/file.go b/discovery/file/file.go
index e7e9d0870f..beea03222b 100644
--- a/discovery/file/file.go
+++ b/discovery/file/file.go
@@ -19,6 +19,7 @@ import (
"errors"
"fmt"
"io"
+ "log/slog"
"os"
"path/filepath"
"strings"
@@ -26,12 +27,11 @@ import (
"time"
"github.com/fsnotify/fsnotify"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
@@ -175,20 +175,20 @@ type Discovery struct {
// and how many target groups they contained.
// This is used to detect deleted target groups.
lastRefresh map[string]int
- logger log.Logger
+ logger *slog.Logger
metrics *fileMetrics
}
// NewDiscovery returns a new file discovery for the given paths.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
fm, ok := metrics.(*fileMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
disc := &Discovery{
@@ -210,7 +210,7 @@ func (d *Discovery) listFiles() []string {
for _, p := range d.paths {
files, err := filepath.Glob(p)
if err != nil {
- level.Error(d.logger).Log("msg", "Error expanding glob", "glob", p, "err", err)
+ d.logger.Error("Error expanding glob", "glob", p, "err", err)
continue
}
paths = append(paths, files...)
@@ -231,7 +231,7 @@ func (d *Discovery) watchFiles() {
p = "./"
}
if err := d.watcher.Add(p); err != nil {
- level.Error(d.logger).Log("msg", "Error adding file watch", "path", p, "err", err)
+ d.logger.Error("Error adding file watch", "path", p, "err", err)
}
}
}
@@ -240,7 +240,7 @@ func (d *Discovery) watchFiles() {
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
- level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err)
+ d.logger.Error("Error adding file watcher", "err", err)
d.metrics.fileWatcherErrorsCount.Inc()
return
}
@@ -280,7 +280,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
case err := <-d.watcher.Errors:
if err != nil {
- level.Error(d.logger).Log("msg", "Error watching file", "err", err)
+ d.logger.Error("Error watching file", "err", err)
}
}
}
@@ -300,7 +300,7 @@ func (d *Discovery) deleteTimestamp(filename string) {
// stop shuts down the file watcher.
func (d *Discovery) stop() {
- level.Debug(d.logger).Log("msg", "Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths))
+ d.logger.Debug("Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths))
done := make(chan struct{})
defer close(done)
@@ -320,10 +320,10 @@ func (d *Discovery) stop() {
}
}()
if err := d.watcher.Close(); err != nil {
- level.Error(d.logger).Log("msg", "Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err)
+ d.logger.Error("Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err)
}
- level.Debug(d.logger).Log("msg", "File discovery stopped")
+ d.logger.Debug("File discovery stopped")
}
// refresh reads all files matching the discovery's patterns and sends the respective
@@ -339,7 +339,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group)
if err != nil {
d.metrics.fileSDReadErrorsCount.Inc()
- level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err)
+ d.logger.Error("Error reading file", "path", p, "err", err)
// Prevent deletion down below.
ref[p] = d.lastRefresh[p]
continue
@@ -356,7 +356,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group)
for f, n := range d.lastRefresh {
m, ok := ref[f]
if !ok || n > m {
- level.Debug(d.logger).Log("msg", "file_sd refresh found file that should be removed", "file", f)
+ d.logger.Debug("file_sd refresh found file that should be removed", "file", f)
d.deleteTimestamp(f)
for i := m; i < n; i++ {
select {
diff --git a/discovery/file/file_test.go b/discovery/file/file_test.go
index 179ac5cd1c..46b2ff0262 100644
--- a/discovery/file/file_test.go
+++ b/discovery/file/file_test.go
@@ -19,6 +19,7 @@ import (
"io"
"os"
"path/filepath"
+ "runtime"
"sort"
"sync"
"testing"
@@ -319,6 +320,9 @@ func valid2Tg(file string) []*targetgroup.Group {
}
func TestInitialUpdate(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("flaky test, see https://github.com/prometheus/prometheus/issues/16212")
+ }
for _, tc := range []string{
"fixtures/valid.yml",
"fixtures/valid.json",
@@ -363,6 +367,9 @@ func TestInvalidFile(t *testing.T) {
}
func TestNoopFileUpdate(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("flaky test, see https://github.com/prometheus/prometheus/issues/16212")
+ }
t.Parallel()
runner := newTestRunner(t)
@@ -381,6 +388,9 @@ func TestNoopFileUpdate(t *testing.T) {
}
func TestFileUpdate(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("flaky test, see https://github.com/prometheus/prometheus/issues/16212")
+ }
t.Parallel()
runner := newTestRunner(t)
@@ -399,6 +409,9 @@ func TestFileUpdate(t *testing.T) {
}
func TestInvalidFileUpdate(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("flaky test, see https://github.com/prometheus/prometheus/issues/16212")
+ }
t.Parallel()
runner := newTestRunner(t)
@@ -421,6 +434,9 @@ func TestInvalidFileUpdate(t *testing.T) {
}
func TestUpdateFileWithPartialWrites(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("flaky test, see https://github.com/prometheus/prometheus/issues/16212")
+ }
t.Parallel()
runner := newTestRunner(t)
diff --git a/discovery/file/metrics.go b/discovery/file/metrics.go
index c01501e4ef..3e3df7bbf6 100644
--- a/discovery/file/metrics.go
+++ b/discovery/file/metrics.go
@@ -30,7 +30,7 @@ type fileMetrics struct {
metricRegisterer discovery.MetricRegisterer
}
-func newDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func newDiscovererMetrics(reg prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
fm := &fileMetrics{
fileSDReadErrorsCount: prometheus.NewCounter(
prometheus.CounterOpts{
diff --git a/discovery/gce/gce.go b/discovery/gce/gce.go
index 15f32dd247..32f1bb6722 100644
--- a/discovery/gce/gce.go
+++ b/discovery/gce/gce.go
@@ -17,12 +17,12 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net/http"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"golang.org/x/oauth2/google"
@@ -83,7 +83,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &gceMetrics{
refreshMetrics: rmi,
}
@@ -129,10 +129,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*gceMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
d := &Discovery{
diff --git a/discovery/hetzner/hcloud.go b/discovery/hetzner/hcloud.go
index df56f94c5f..88fe09bd3e 100644
--- a/discovery/hetzner/hcloud.go
+++ b/discovery/hetzner/hcloud.go
@@ -15,12 +15,12 @@ package hetzner
import (
"context"
+ "log/slog"
"net"
"net/http"
"strconv"
"time"
- "github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -53,14 +53,16 @@ const (
// the Discoverer interface.
type hcloudDiscovery struct {
*refresh.Discovery
- client *hcloud.Client
- port int
+ client *hcloud.Client
+ port int
+ labelSelector string
}
// newHcloudDiscovery returns a new hcloudDiscovery which periodically refreshes its targets.
-func newHcloudDiscovery(conf *SDConfig, _ log.Logger) (*hcloudDiscovery, error) {
+func newHcloudDiscovery(conf *SDConfig, _ *slog.Logger) (*hcloudDiscovery, error) {
d := &hcloudDiscovery{
- port: conf.Port,
+ port: conf.Port,
+ labelSelector: conf.LabelSelector,
}
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "hetzner_sd")
@@ -79,7 +81,10 @@ func newHcloudDiscovery(conf *SDConfig, _ log.Logger) (*hcloudDiscovery, error)
}
func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
- servers, err := d.client.Server.All(ctx)
+ servers, err := d.client.Server.AllWithOpts(ctx, hcloud.ServerListOpts{ListOpts: hcloud.ListOpts{
+ PerPage: 50,
+ LabelSelector: d.labelSelector,
+ }})
if err != nil {
return nil, err
}
diff --git a/discovery/hetzner/hcloud_test.go b/discovery/hetzner/hcloud_test.go
index 10b799037a..fa8291625a 100644
--- a/discovery/hetzner/hcloud_test.go
+++ b/discovery/hetzner/hcloud_test.go
@@ -18,8 +18,8 @@ import (
"fmt"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
)
@@ -43,7 +43,7 @@ func TestHCloudSDRefresh(t *testing.T) {
cfg.HTTPClientConfig.BearerToken = hcloudTestToken
cfg.hcloudEndpoint = suite.Mock.Endpoint()
- d, err := newHcloudDiscovery(&cfg, log.NewNopLogger())
+ d, err := newHcloudDiscovery(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
targetGroups, err := d.refresh(context.Background())
diff --git a/discovery/hetzner/hetzner.go b/discovery/hetzner/hetzner.go
index 69c823d382..9245d933cc 100644
--- a/discovery/hetzner/hetzner.go
+++ b/discovery/hetzner/hetzner.go
@@ -17,9 +17,9 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"time"
- "github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -59,12 +59,15 @@ type SDConfig struct {
RefreshInterval model.Duration `yaml:"refresh_interval"`
Port int `yaml:"port"`
Role Role `yaml:"role"`
- hcloudEndpoint string // For tests only.
- robotEndpoint string // For tests only.
+
+ LabelSelector string `yaml:"label_selector,omitempty"`
+
+ hcloudEndpoint string // For tests only.
+ robotEndpoint string // For tests only.
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &hetznerMetrics{
refreshMetrics: rmi,
}
@@ -135,10 +138,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*hetznerMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
r, err := newRefresher(conf, logger)
@@ -157,7 +160,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
), nil
}
-func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
+func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) {
switch conf.Role {
case HetznerRoleHcloud:
if conf.hcloudEndpoint == "" {
diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go
index 64155bfaed..33aa2abcd8 100644
--- a/discovery/hetzner/robot.go
+++ b/discovery/hetzner/robot.go
@@ -18,13 +18,13 @@ import (
"encoding/json"
"fmt"
"io"
+ "log/slog"
"net"
"net/http"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@@ -39,7 +39,7 @@ const (
hetznerLabelRobotCancelled = hetznerRobotLabelPrefix + "cancelled"
)
-var userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
+var userAgent = version.PrometheusUserAgent()
// Discovery periodically performs Hetzner Robot requests. It implements
// the Discoverer interface.
@@ -51,7 +51,7 @@ type robotDiscovery struct {
}
// newRobotDiscovery returns a new robotDiscovery which periodically refreshes its targets.
-func newRobotDiscovery(conf *SDConfig, _ log.Logger) (*robotDiscovery, error) {
+func newRobotDiscovery(conf *SDConfig, _ *slog.Logger) (*robotDiscovery, error) {
d := &robotDiscovery{
port: conf.Port,
endpoint: conf.robotEndpoint,
@@ -87,7 +87,6 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error)
resp.Body.Close()
}()
- //nolint:usestdlibvars
if resp.StatusCode/100 != 2 {
return nil, fmt.Errorf("non 2xx status '%d' response during hetzner service discovery with role robot", resp.StatusCode)
}
diff --git a/discovery/hetzner/robot_test.go b/discovery/hetzner/robot_test.go
index abee5fea90..2618bd097c 100644
--- a/discovery/hetzner/robot_test.go
+++ b/discovery/hetzner/robot_test.go
@@ -18,9 +18,9 @@ import (
"fmt"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
)
@@ -42,7 +42,7 @@ func TestRobotSDRefresh(t *testing.T) {
cfg.HTTPClientConfig.BasicAuth = &config.BasicAuth{Username: robotTestUsername, Password: robotTestPassword}
cfg.robotEndpoint = suite.Mock.Endpoint()
- d, err := newRobotDiscovery(&cfg, log.NewNopLogger())
+ d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
targetGroups, err := d.refresh(context.Background())
@@ -91,12 +91,11 @@ func TestRobotSDRefreshHandleError(t *testing.T) {
cfg := DefaultSDConfig
cfg.robotEndpoint = suite.Mock.Endpoint()
- d, err := newRobotDiscovery(&cfg, log.NewNopLogger())
+ d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
targetGroups, err := d.refresh(context.Background())
- require.Error(t, err)
- require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error())
+ require.EqualError(t, err, "non 2xx status '401' response during hetzner service discovery with role robot")
require.Empty(t, targetGroups)
}
diff --git a/discovery/http/http.go b/discovery/http/http.go
index ff76fd7627..ebc1c31f61 100644
--- a/discovery/http/http.go
+++ b/discovery/http/http.go
@@ -19,17 +19,18 @@ import (
"errors"
"fmt"
"io"
+ "log/slog"
"net/http"
"net/url"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
"github.com/prometheus/prometheus/discovery"
@@ -40,10 +41,10 @@ import (
var (
// DefaultSDConfig is the default HTTP SD configuration.
DefaultSDConfig = SDConfig{
- RefreshInterval: model.Duration(60 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
+ RefreshInterval: model.Duration(60 * time.Second),
}
- userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
+ userAgent = version.PrometheusUserAgent()
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
)
@@ -85,17 +86,17 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err
}
if c.URL == "" {
- return fmt.Errorf("URL is missing")
+ return errors.New("URL is missing")
}
parsedURL, err := url.Parse(c.URL)
if err != nil {
return err
}
if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
- return fmt.Errorf("URL scheme must be 'http' or 'https'")
+ return errors.New("URL scheme must be 'http' or 'https'")
}
if parsedURL.Host == "" {
- return fmt.Errorf("host is missing in URL")
+ return errors.New("host is missing in URL")
}
return c.HTTPClientConfig.Validate()
}
@@ -114,14 +115,14 @@ type Discovery struct {
}
// NewDiscovery returns a new HTTP discovery for the given config.
-func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*httpMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http", clientOpts...)
diff --git a/discovery/http/http_test.go b/discovery/http/http_test.go
index 0cafe035dc..3af9e4e504 100644
--- a/discovery/http/http_test.go
+++ b/discovery/http/http_test.go
@@ -21,11 +21,11 @@ import (
"testing"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@@ -49,7 +49,7 @@ func TestHTTPValidRefresh(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -75,7 +75,7 @@ func TestHTTPValidRefresh(t *testing.T) {
}
func TestHTTPInvalidCode(t *testing.T) {
- ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusBadRequest)
}))
@@ -94,7 +94,7 @@ func TestHTTPInvalidCode(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -104,7 +104,7 @@ func TestHTTPInvalidCode(t *testing.T) {
}
func TestHTTPInvalidFormat(t *testing.T) {
- ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
fmt.Fprintln(w, "{}")
}))
@@ -123,7 +123,7 @@ func TestHTTPInvalidFormat(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -212,7 +212,7 @@ func TestContentTypeRegex(t *testing.T) {
func TestSourceDisappeared(t *testing.T) {
var stubResponse string
- ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "application/json")
fmt.Fprintln(w, stubResponse)
}))
@@ -442,7 +442,7 @@ func TestSourceDisappeared(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
for _, test := range cases {
ctx := context.Background()
diff --git a/discovery/install/install.go b/discovery/install/install.go
index f090076b7f..9c397f9d36 100644
--- a/discovery/install/install.go
+++ b/discovery/install/install.go
@@ -36,6 +36,7 @@ import (
_ "github.com/prometheus/prometheus/discovery/ovhcloud" // register ovhcloud
_ "github.com/prometheus/prometheus/discovery/puppetdb" // register puppetdb
_ "github.com/prometheus/prometheus/discovery/scaleway" // register scaleway
+ _ "github.com/prometheus/prometheus/discovery/stackit" // register stackit
_ "github.com/prometheus/prometheus/discovery/triton" // register triton
_ "github.com/prometheus/prometheus/discovery/uyuni" // register uyuni
_ "github.com/prometheus/prometheus/discovery/vultr" // register vultr
diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go
index c8b4f7f8e5..475e6c30eb 100644
--- a/discovery/ionos/ionos.go
+++ b/discovery/ionos/ionos.go
@@ -15,10 +15,9 @@ package ionos
import (
"errors"
- "fmt"
+ "log/slog"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -43,10 +42,10 @@ func init() {
type Discovery struct{}
// NewDiscovery returns a new refresh.Discovery for IONOS Cloud.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*ionosMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if conf.ionosEndpoint == "" {
@@ -90,7 +89,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &ionosMetrics{
refreshMetrics: rmi,
}
diff --git a/discovery/ionos/server.go b/discovery/ionos/server.go
index a850fbbfb4..81bb497277 100644
--- a/discovery/ionos/server.go
+++ b/discovery/ionos/server.go
@@ -15,14 +15,13 @@ package ionos
import (
"context"
- "fmt"
+ "log/slog"
"net"
"net/http"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
ionoscloud "github.com/ionos-cloud/sdk-go/v6"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -60,7 +59,7 @@ type serverDiscovery struct {
datacenterID string
}
-func newServerDiscovery(conf *SDConfig, _ log.Logger) (*serverDiscovery, error) {
+func newServerDiscovery(conf *SDConfig, _ *slog.Logger) (*serverDiscovery, error) {
d := &serverDiscovery{
port: conf.Port,
datacenterID: conf.DatacenterID,
@@ -77,7 +76,7 @@ func newServerDiscovery(conf *SDConfig, _ log.Logger) (*serverDiscovery, error)
Transport: rt,
Timeout: time.Duration(conf.RefreshInterval),
}
- cfg.UserAgent = fmt.Sprintf("Prometheus/%s", version.Version)
+ cfg.UserAgent = version.PrometheusUserAgent()
d.client = ionoscloud.NewAPIClient(cfg)
diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go
index c7a60ae6d3..c179779277 100644
--- a/discovery/kubernetes/endpoints.go
+++ b/discovery/kubernetes/endpoints.go
@@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@@ -33,7 +33,7 @@ import (
// Endpoints discovers new endpoint targets.
type Endpoints struct {
- logger log.Logger
+ logger *slog.Logger
endpointsInf cache.SharedIndexInformer
serviceInf cache.SharedInformer
@@ -49,9 +49,10 @@ type Endpoints struct {
}
// NewEndpoints returns a new endpoints discovery.
-func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints {
+// Endpoints API is deprecated in k8s v1.33+, but we should still support it.
+func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints {
if l == nil {
- l = log.NewNopLogger()
+ l = promslog.NewNopLogger()
}
epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd)
@@ -92,26 +93,23 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding endpoints event handler.", "err", err)
+ l.Error("Error adding endpoints event handler.", "err", err)
}
serviceUpdate := func(o interface{}) {
svc, err := convertToService(o)
if err != nil {
- level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err)
+ e.logger.Error("converting to Service object failed", "err", err)
return
}
- ep := &apiv1.Endpoints{}
- ep.Namespace = svc.Namespace
- ep.Name = svc.Name
- obj, exists, err := e.endpointsStore.Get(ep)
+ obj, exists, err := e.endpointsStore.GetByKey(namespacedName(svc.Namespace, svc.Name))
if exists && err == nil {
e.enqueue(obj.(*apiv1.Endpoints))
}
if err != nil {
- level.Error(e.logger).Log("msg", "retrieving endpoints failed", "err", err)
+ e.logger.Error("retrieving endpoints failed", "err", err)
}
}
_, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@@ -131,7 +129,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding services event handler.", "err", err)
+ l.Error("Error adding services event handler.", "err", err)
}
_, err = e.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: func(old, cur interface{}) {
@@ -154,7 +152,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding pods event handler.", "err", err)
+ l.Error("Error adding pods event handler.", "err", err)
}
if e.withNodeMetadata {
_, err = e.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@@ -167,12 +165,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
e.enqueueNode(node.Name)
},
DeleteFunc: func(o interface{}) {
- node := o.(*apiv1.Node)
- e.enqueueNode(node.Name)
+ nodeName, err := nodeName(o)
+ if err != nil {
+ l.Error("Error getting Node name", "err", err)
+ }
+ e.enqueueNode(nodeName)
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err)
+ l.Error("Error adding nodes event handler.", "err", err)
}
}
@@ -182,7 +183,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
func (e *Endpoints) enqueueNode(nodeName string) {
endpoints, err := e.endpointsInf.GetIndexer().ByIndex(nodeIndex, nodeName)
if err != nil {
- level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err)
+ e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err)
return
}
@@ -194,7 +195,7 @@ func (e *Endpoints) enqueueNode(nodeName string) {
func (e *Endpoints) enqueuePod(podNamespacedName string) {
endpoints, err := e.endpointsInf.GetIndexer().ByIndex(podIndex, podNamespacedName)
if err != nil {
- level.Error(e.logger).Log("msg", "Error getting endpoints for pod", "pod", podNamespacedName, "err", err)
+ e.logger.Error("Error getting endpoints for pod", "pod", podNamespacedName, "err", err)
return
}
@@ -223,7 +224,7 @@ func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache")
+ e.logger.Error("endpoints informer unable to sync cache")
}
return
}
@@ -247,13 +248,13 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group)
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
- level.Error(e.logger).Log("msg", "splitting key failed", "key", key)
+ e.logger.Error("splitting key failed", "key", key)
return true
}
o, exists, err := e.endpointsStore.GetByKey(key)
if err != nil {
- level.Error(e.logger).Log("msg", "getting object from store failed", "key", key)
+ e.logger.Error("getting object from store failed", "key", key)
return true
}
if !exists {
@@ -262,7 +263,7 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group)
}
eps, err := convertToEndpoints(o)
if err != nil {
- level.Error(e.logger).Log("msg", "converting to Endpoints object failed", "err", err)
+ e.logger.Error("converting to Endpoints object failed", "err", err)
return true
}
send(ctx, ch, e.buildEndpoints(eps))
@@ -361,16 +362,19 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
target = target.Merge(podLabels(pod))
// Attach potential container port labels matching the endpoint port.
- for _, c := range pod.Spec.Containers {
+ containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
+ for i, c := range containers {
for _, cport := range c.Ports {
if port.Port == cport.ContainerPort {
ports := strconv.FormatUint(uint64(port.Port), 10)
+ isInit := i >= len(pod.Spec.Containers)
target[podContainerNameLabel] = lv(c.Name)
target[podContainerImageLabel] = lv(c.Image)
target[podContainerPortNameLabel] = lv(cport.Name)
target[podContainerPortNumberLabel] = lv(ports)
target[podContainerPortProtocolLabel] = lv(string(port.Protocol))
+ target[podContainerIsInit] = lv(strconv.FormatBool(isInit))
break
}
}
@@ -397,10 +401,10 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
v := eps.Labels[apiv1.EndpointsOverCapacity]
if v == "truncated" {
- level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
+ e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
}
if v == "warning" {
- level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
+ e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
}
// For all seen pods, check all container ports. If they were not covered
@@ -411,7 +415,8 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
continue
}
- for _, c := range pe.pod.Spec.Containers {
+ containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...)
+ for i, c := range containers {
for _, cport := range c.Ports {
hasSeenPort := func() bool {
for _, eport := range pe.servicePorts {
@@ -428,6 +433,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
+ isInit := i >= len(pe.pod.Spec.Containers)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
@@ -435,6 +441,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
+ podContainerIsInit: lv(strconv.FormatBool(isInit)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
@@ -448,13 +455,10 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
if ref == nil || ref.Kind != "Pod" {
return nil
}
- p := &apiv1.Pod{}
- p.Namespace = ref.Namespace
- p.Name = ref.Name
- obj, exists, err := e.podStore.Get(p)
+ obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name))
if err != nil {
- level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err)
+ e.logger.Error("resolving pod ref failed", "err", err)
return nil
}
if !exists {
@@ -464,31 +468,27 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
}
func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) {
- svc := &apiv1.Service{}
- svc.Namespace = ns
- svc.Name = name
-
- obj, exists, err := e.serviceStore.Get(svc)
+ obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name))
if err != nil {
- level.Error(e.logger).Log("msg", "retrieving service failed", "err", err)
+ e.logger.Error("retrieving service failed", "err", err)
return
}
if !exists {
return
}
- svc = obj.(*apiv1.Service)
+ svc := obj.(*apiv1.Service)
tg.Labels = tg.Labels.Merge(serviceLabels(svc))
}
-func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger log.Logger, nodeName *string) model.LabelSet {
+func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger *slog.Logger, nodeName *string) model.LabelSet {
if nodeName == nil {
return tg
}
obj, exists, err := nodeInf.GetStore().GetByKey(*nodeName)
if err != nil {
- level.Error(logger).Log("msg", "Error getting node", "node", *nodeName, "err", err)
+ logger.Error("Error getting node", "node", *nodeName, "err", err)
return tg
}
diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go
index e877657dba..28ad5697bc 100644
--- a/discovery/kubernetes/endpoints_test.go
+++ b/discovery/kubernetes/endpoints_test.go
@@ -18,10 +18,12 @@ import (
"testing"
"github.com/prometheus/common/model"
+ "github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
+ "k8s.io/client-go/tools/cache"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
@@ -95,6 +97,7 @@ func makeEndpoints() *v1.Endpoints {
}
func TestEndpointsDiscoveryBeforeRun(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{})
k8sDiscoveryTest{
@@ -149,6 +152,7 @@ func TestEndpointsDiscoveryBeforeRun(t *testing.T) {
}
func TestEndpointsDiscoveryAdd(t *testing.T) {
+ t.Parallel()
obj := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
@@ -244,6 +248,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
},
{
"__address__": "1.2.3.4:9001",
@@ -259,6 +264,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9001",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@@ -272,6 +278,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
}
func TestEndpointsDiscoveryDelete(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{
@@ -290,6 +297,7 @@ func TestEndpointsDiscoveryDelete(t *testing.T) {
}
func TestEndpointsDiscoveryUpdate(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{
@@ -361,6 +369,7 @@ func TestEndpointsDiscoveryUpdate(t *testing.T) {
}
func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{
@@ -389,6 +398,7 @@ func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
}
func TestEndpointsDiscoveryWithService(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{
@@ -454,6 +464,7 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
}
func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{
@@ -534,6 +545,7 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
}
func TestEndpointsDiscoveryWithNodeMetadata(t *testing.T) {
+ t.Parallel()
metadataConfig := AttachMetadataConfig{Node: true}
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
@@ -607,6 +619,7 @@ func TestEndpointsDiscoveryWithNodeMetadata(t *testing.T) {
}
func TestEndpointsDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
+ t.Parallel()
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
node1 := makeNode("foobar", "", "", nodeLabels1, nil)
@@ -684,6 +697,7 @@ func TestEndpointsDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
}
func TestEndpointsDiscoveryNamespaces(t *testing.T) {
+ t.Parallel()
epOne := makeEndpoints()
epOne.Namespace = "ns1"
objs := []runtime.Object{
@@ -821,6 +835,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@@ -834,6 +849,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
}
func TestEndpointsDiscoveryOwnNamespace(t *testing.T) {
+ t.Parallel()
epOne := makeEndpoints()
epOne.Namespace = "own-ns"
@@ -928,6 +944,7 @@ func TestEndpointsDiscoveryOwnNamespace(t *testing.T) {
}
func TestEndpointsDiscoveryEmptyPodStatus(t *testing.T) {
+ t.Parallel()
ep := makeEndpoints()
ep.Namespace = "ns"
@@ -970,9 +987,10 @@ func TestEndpointsDiscoveryEmptyPodStatus(t *testing.T) {
}.Run(t)
}
-// TestEndpointsUpdatePod makes sure that Endpoints discovery detects underlying Pods changes.
+// TestEndpointsDiscoveryUpdatePod makes sure that Endpoints discovery detects underlying Pods changes.
// See https://github.com/prometheus/prometheus/issues/11305 for more details.
func TestEndpointsDiscoveryUpdatePod(t *testing.T) {
+ t.Parallel()
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
@@ -1078,6 +1096,7 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@@ -1089,3 +1108,187 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) {
},
}.Run(t)
}
+
+func TestEndpointsDiscoverySidecarContainer(t *testing.T) {
+ t.Parallel()
+ objs := []runtime.Object{
+ &v1.Endpoints{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "testsidecar",
+ Namespace: "default",
+ },
+ Subsets: []v1.EndpointSubset{
+ {
+ Addresses: []v1.EndpointAddress{
+ {
+ IP: "4.3.2.1",
+ TargetRef: &v1.ObjectReference{
+ Kind: "Pod",
+ Name: "testpod",
+ Namespace: "default",
+ },
+ },
+ },
+ Ports: []v1.EndpointPort{
+ {
+ Name: "testport",
+ Port: 9000,
+ Protocol: v1.ProtocolTCP,
+ },
+ {
+ Name: "initport",
+ Port: 9111,
+ Protocol: v1.ProtocolTCP,
+ },
+ },
+ },
+ },
+ },
+ &v1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "testpod",
+ Namespace: "default",
+ UID: types.UID("deadbeef"),
+ },
+ Spec: v1.PodSpec{
+ NodeName: "testnode",
+ InitContainers: []v1.Container{
+ {
+ Name: "ic1",
+ Image: "ic1:latest",
+ Ports: []v1.ContainerPort{
+ {
+ Name: "initport",
+ ContainerPort: 1111,
+ Protocol: v1.ProtocolTCP,
+ },
+ },
+ },
+ {
+ Name: "ic2",
+ Image: "ic2:latest",
+ Ports: []v1.ContainerPort{
+ {
+ Name: "initport",
+ ContainerPort: 9111,
+ Protocol: v1.ProtocolTCP,
+ },
+ },
+ },
+ },
+ Containers: []v1.Container{
+ {
+ Name: "c1",
+ Image: "c1:latest",
+ Ports: []v1.ContainerPort{
+ {
+ Name: "mainport",
+ ContainerPort: 9000,
+ Protocol: v1.ProtocolTCP,
+ },
+ },
+ },
+ },
+ },
+ Status: v1.PodStatus{
+ HostIP: "2.3.4.5",
+ PodIP: "4.3.2.1",
+ },
+ },
+ }
+
+ n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, objs...)
+
+ k8sDiscoveryTest{
+ discovery: n,
+ expectedMaxItems: 1,
+ expectedRes: map[string]*targetgroup.Group{
+ "endpoints/default/testsidecar": {
+ Targets: []model.LabelSet{
+ {
+ "__address__": "4.3.2.1:9000",
+ "__meta_kubernetes_endpoint_address_target_kind": "Pod",
+ "__meta_kubernetes_endpoint_address_target_name": "testpod",
+ "__meta_kubernetes_endpoint_port_name": "testport",
+ "__meta_kubernetes_endpoint_port_protocol": "TCP",
+ "__meta_kubernetes_endpoint_ready": "true",
+ "__meta_kubernetes_pod_container_image": "c1:latest",
+ "__meta_kubernetes_pod_container_name": "c1",
+ "__meta_kubernetes_pod_container_port_name": "mainport",
+ "__meta_kubernetes_pod_container_port_number": "9000",
+ "__meta_kubernetes_pod_container_port_protocol": "TCP",
+ "__meta_kubernetes_pod_host_ip": "2.3.4.5",
+ "__meta_kubernetes_pod_ip": "4.3.2.1",
+ "__meta_kubernetes_pod_name": "testpod",
+ "__meta_kubernetes_pod_node_name": "testnode",
+ "__meta_kubernetes_pod_phase": "",
+ "__meta_kubernetes_pod_ready": "unknown",
+ "__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
+ },
+ {
+ "__address__": "4.3.2.1:9111",
+ "__meta_kubernetes_endpoint_address_target_kind": "Pod",
+ "__meta_kubernetes_endpoint_address_target_name": "testpod",
+ "__meta_kubernetes_endpoint_port_name": "initport",
+ "__meta_kubernetes_endpoint_port_protocol": "TCP",
+ "__meta_kubernetes_endpoint_ready": "true",
+ "__meta_kubernetes_pod_container_image": "ic2:latest",
+ "__meta_kubernetes_pod_container_name": "ic2",
+ "__meta_kubernetes_pod_container_port_name": "initport",
+ "__meta_kubernetes_pod_container_port_number": "9111",
+ "__meta_kubernetes_pod_container_port_protocol": "TCP",
+ "__meta_kubernetes_pod_host_ip": "2.3.4.5",
+ "__meta_kubernetes_pod_ip": "4.3.2.1",
+ "__meta_kubernetes_pod_name": "testpod",
+ "__meta_kubernetes_pod_node_name": "testnode",
+ "__meta_kubernetes_pod_phase": "",
+ "__meta_kubernetes_pod_ready": "unknown",
+ "__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "true",
+ },
+ {
+ "__address__": "4.3.2.1:1111",
+ "__meta_kubernetes_pod_container_image": "ic1:latest",
+ "__meta_kubernetes_pod_container_name": "ic1",
+ "__meta_kubernetes_pod_container_port_name": "initport",
+ "__meta_kubernetes_pod_container_port_number": "1111",
+ "__meta_kubernetes_pod_container_port_protocol": "TCP",
+ "__meta_kubernetes_pod_host_ip": "2.3.4.5",
+ "__meta_kubernetes_pod_ip": "4.3.2.1",
+ "__meta_kubernetes_pod_name": "testpod",
+ "__meta_kubernetes_pod_node_name": "testnode",
+ "__meta_kubernetes_pod_phase": "",
+ "__meta_kubernetes_pod_ready": "unknown",
+ "__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "true",
+ },
+ },
+ Labels: model.LabelSet{
+ "__meta_kubernetes_endpoints_name": "testsidecar",
+ "__meta_kubernetes_namespace": "default",
+ },
+ Source: "endpoints/default/testsidecar",
+ },
+ },
+ }.Run(t)
+}
+
+func BenchmarkResolvePodRef(b *testing.B) {
+ indexer := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, nil)
+ e := &Endpoints{
+ podStore: indexer,
+ }
+
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ p := e.resolvePodRef(&v1.ObjectReference{
+ Kind: "Pod",
+ Name: "testpod",
+ Namespace: "foo",
+ })
+ require.Nil(b, p)
+ }
+}
diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go
index 7a70255c12..625601abc1 100644
--- a/discovery/kubernetes/endpointslice.go
+++ b/discovery/kubernetes/endpointslice.go
@@ -16,17 +16,15 @@ package kubernetes
import (
"context"
"errors"
- "fmt"
+ "log/slog"
"net"
"strconv"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
- "k8s.io/api/discovery/v1beta1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@@ -34,9 +32,11 @@ import (
"github.com/prometheus/prometheus/util/strutil"
)
+const serviceIndex = "service"
+
// EndpointSlice discovers new endpoint targets.
type EndpointSlice struct {
- logger log.Logger
+ logger *slog.Logger
endpointSliceInf cache.SharedIndexInformer
serviceInf cache.SharedInformer
@@ -52,9 +52,9 @@ type EndpointSlice struct {
}
// NewEndpointSlice returns a new endpointslice discovery.
-func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice {
+func NewEndpointSlice(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice {
if l == nil {
- l = log.NewNopLogger()
+ l = promslog.NewNopLogger()
}
epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd)
@@ -93,28 +93,24 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding endpoint slices event handler.", "err", err)
+ l.Error("Error adding endpoint slices event handler.", "err", err)
}
serviceUpdate := func(o interface{}) {
svc, err := convertToService(o)
if err != nil {
- level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err)
+ e.logger.Error("converting to Service object failed", "err", err)
return
}
- // TODO(brancz): use cache.Indexer to index endpoints by
- // disv1beta1.LabelServiceName so this operation doesn't have to
- // iterate over all endpoint objects.
- for _, obj := range e.endpointSliceStore.List() {
- esa, err := e.getEndpointSliceAdaptor(obj)
- if err != nil {
- level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err)
- continue
- }
- if lv, exists := esa.labels()[esa.labelServiceName()]; exists && lv == svc.Name {
- e.enqueue(esa.get())
- }
+ endpointSlices, err := e.endpointSliceInf.GetIndexer().ByIndex(serviceIndex, namespacedName(svc.Namespace, svc.Name))
+ if err != nil {
+ e.logger.Error("getting endpoint slices by service name failed", "err", err)
+ return
+ }
+
+ for _, endpointSlice := range endpointSlices {
+ e.enqueue(endpointSlice)
}
}
_, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@@ -132,7 +128,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding services event handler.", "err", err)
+ l.Error("Error adding services event handler.", "err", err)
}
if e.withNodeMetadata {
@@ -146,12 +142,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
e.enqueueNode(node.Name)
},
DeleteFunc: func(o interface{}) {
- node := o.(*apiv1.Node)
- e.enqueueNode(node.Name)
+ nodeName, err := nodeName(o)
+ if err != nil {
+ l.Error("Error getting Node name", "err", err)
+ }
+ e.enqueueNode(nodeName)
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err)
+ l.Error("Error adding nodes event handler.", "err", err)
}
}
@@ -161,7 +160,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
func (e *EndpointSlice) enqueueNode(nodeName string) {
endpoints, err := e.endpointSliceInf.GetIndexer().ByIndex(nodeIndex, nodeName)
if err != nil {
- level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err)
+ e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err)
return
}
@@ -189,7 +188,7 @@ func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group)
}
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache")
+ e.logger.Error("endpointslice informer unable to sync cache")
}
return
}
@@ -213,13 +212,13 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
- level.Error(e.logger).Log("msg", "splitting key failed", "key", key)
+ e.logger.Error("splitting key failed", "key", key)
return true
}
o, exists, err := e.endpointSliceStore.GetByKey(key)
if err != nil {
- level.Error(e.logger).Log("msg", "getting object from store failed", "key", key)
+ e.logger.Error("getting object from store failed", "key", key)
return true
}
if !exists {
@@ -227,29 +226,17 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr
return true
}
- esa, err := e.getEndpointSliceAdaptor(o)
- if err != nil {
- level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err)
- return true
+ if es, ok := o.(*v1.EndpointSlice); ok {
+ send(ctx, ch, e.buildEndpointSlice(*es))
+ } else {
+ e.logger.Error("received unexpected object", "object", o)
+ return false
}
-
- send(ctx, ch, e.buildEndpointSlice(esa))
return true
}
-func (e *EndpointSlice) getEndpointSliceAdaptor(o interface{}) (endpointSliceAdaptor, error) {
- switch endpointSlice := o.(type) {
- case *v1.EndpointSlice:
- return newEndpointSliceAdaptorFromV1(endpointSlice), nil
- case *v1beta1.EndpointSlice:
- return newEndpointSliceAdaptorFromV1beta1(endpointSlice), nil
- default:
- return nil, fmt.Errorf("received unexpected object: %v", o)
- }
-}
-
-func endpointSliceSource(ep endpointSliceAdaptor) string {
- return endpointSliceSourceFromNamespaceAndName(ep.namespace(), ep.name())
+func endpointSliceSource(ep v1.EndpointSlice) string {
+ return endpointSliceSourceFromNamespaceAndName(ep.Namespace, ep.Name)
}
func endpointSliceSourceFromNamespaceAndName(namespace, name string) string {
@@ -274,95 +261,95 @@ const (
endpointSliceEndpointTopologyLabelPresentPrefix = metaLabelPrefix + "endpointslice_endpoint_topology_present_"
)
-func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgroup.Group {
+func (e *EndpointSlice) buildEndpointSlice(eps v1.EndpointSlice) *targetgroup.Group {
tg := &targetgroup.Group{
Source: endpointSliceSource(eps),
}
tg.Labels = model.LabelSet{
- namespaceLabel: lv(eps.namespace()),
- endpointSliceAddressTypeLabel: lv(eps.addressType()),
+ namespaceLabel: lv(eps.Namespace),
+ endpointSliceAddressTypeLabel: lv(string(eps.AddressType)),
}
- addObjectMetaLabels(tg.Labels, eps.getObjectMeta(), RoleEndpointSlice)
+ addObjectMetaLabels(tg.Labels, eps.ObjectMeta, RoleEndpointSlice)
e.addServiceLabels(eps, tg)
type podEntry struct {
pod *apiv1.Pod
- servicePorts []endpointSlicePortAdaptor
+ servicePorts []v1.EndpointPort
}
seenPods := map[string]*podEntry{}
- add := func(addr string, ep endpointSliceEndpointAdaptor, port endpointSlicePortAdaptor) {
+ add := func(addr string, ep v1.Endpoint, port v1.EndpointPort) {
a := addr
- if port.port() != nil {
- a = net.JoinHostPort(addr, strconv.FormatUint(uint64(*port.port()), 10))
+ if port.Port != nil {
+ a = net.JoinHostPort(addr, strconv.FormatUint(uint64(*port.Port), 10))
}
target := model.LabelSet{
model.AddressLabel: lv(a),
}
- if port.name() != nil {
- target[endpointSlicePortNameLabel] = lv(*port.name())
+ if port.Name != nil {
+ target[endpointSlicePortNameLabel] = lv(*port.Name)
}
- if port.protocol() != nil {
- target[endpointSlicePortProtocolLabel] = lv(*port.protocol())
+ if port.Protocol != nil {
+ target[endpointSlicePortProtocolLabel] = lv(string(*port.Protocol))
}
- if port.port() != nil {
- target[endpointSlicePortLabel] = lv(strconv.FormatUint(uint64(*port.port()), 10))
+ if port.Port != nil {
+ target[endpointSlicePortLabel] = lv(strconv.FormatUint(uint64(*port.Port), 10))
}
- if port.appProtocol() != nil {
- target[endpointSlicePortAppProtocol] = lv(*port.appProtocol())
+ if port.AppProtocol != nil {
+ target[endpointSlicePortAppProtocol] = lv(*port.AppProtocol)
}
- if ep.conditions().ready() != nil {
- target[endpointSliceEndpointConditionsReadyLabel] = lv(strconv.FormatBool(*ep.conditions().ready()))
+ if ep.Conditions.Ready != nil {
+ target[endpointSliceEndpointConditionsReadyLabel] = lv(strconv.FormatBool(*ep.Conditions.Ready))
}
- if ep.conditions().serving() != nil {
- target[endpointSliceEndpointConditionsServingLabel] = lv(strconv.FormatBool(*ep.conditions().serving()))
+ if ep.Conditions.Serving != nil {
+ target[endpointSliceEndpointConditionsServingLabel] = lv(strconv.FormatBool(*ep.Conditions.Serving))
}
- if ep.conditions().terminating() != nil {
- target[endpointSliceEndpointConditionsTerminatingLabel] = lv(strconv.FormatBool(*ep.conditions().terminating()))
+ if ep.Conditions.Terminating != nil {
+ target[endpointSliceEndpointConditionsTerminatingLabel] = lv(strconv.FormatBool(*ep.Conditions.Terminating))
}
- if ep.hostname() != nil {
- target[endpointSliceEndpointHostnameLabel] = lv(*ep.hostname())
+ if ep.Hostname != nil {
+ target[endpointSliceEndpointHostnameLabel] = lv(*ep.Hostname)
}
- if ep.targetRef() != nil {
- target[model.LabelName(endpointSliceAddressTargetKindLabel)] = lv(ep.targetRef().Kind)
- target[model.LabelName(endpointSliceAddressTargetNameLabel)] = lv(ep.targetRef().Name)
+ if ep.TargetRef != nil {
+ target[model.LabelName(endpointSliceAddressTargetKindLabel)] = lv(ep.TargetRef.Kind)
+ target[model.LabelName(endpointSliceAddressTargetNameLabel)] = lv(ep.TargetRef.Name)
}
- if ep.nodename() != nil {
- target[endpointSliceEndpointNodenameLabel] = lv(*ep.nodename())
+ if ep.NodeName != nil {
+ target[endpointSliceEndpointNodenameLabel] = lv(*ep.NodeName)
}
- if ep.zone() != nil {
- target[model.LabelName(endpointSliceEndpointZoneLabel)] = lv(*ep.zone())
+ if ep.Zone != nil {
+ target[model.LabelName(endpointSliceEndpointZoneLabel)] = lv(*ep.Zone)
}
- for k, v := range ep.topology() {
+ for k, v := range ep.DeprecatedTopology {
ln := strutil.SanitizeLabelName(k)
target[model.LabelName(endpointSliceEndpointTopologyLabelPrefix+ln)] = lv(v)
target[model.LabelName(endpointSliceEndpointTopologyLabelPresentPrefix+ln)] = presentValue
}
if e.withNodeMetadata {
- if ep.targetRef() != nil && ep.targetRef().Kind == "Node" {
- target = addNodeLabels(target, e.nodeInf, e.logger, &ep.targetRef().Name)
+ if ep.TargetRef != nil && ep.TargetRef.Kind == "Node" {
+ target = addNodeLabels(target, e.nodeInf, e.logger, &ep.TargetRef.Name)
} else {
- target = addNodeLabels(target, e.nodeInf, e.logger, ep.nodename())
+ target = addNodeLabels(target, e.nodeInf, e.logger, ep.NodeName)
}
}
- pod := e.resolvePodRef(ep.targetRef())
+ pod := e.resolvePodRef(ep.TargetRef)
if pod == nil {
// This target is not a Pod, so don't continue with Pod specific logic.
tg.Targets = append(tg.Targets, target)
@@ -380,19 +367,23 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
target = target.Merge(podLabels(pod))
// Attach potential container port labels matching the endpoint port.
- for _, c := range pod.Spec.Containers {
+ containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
+ for i, c := range containers {
for _, cport := range c.Ports {
- if port.port() == nil {
+ if port.Port == nil {
continue
}
- if *port.port() == cport.ContainerPort {
- ports := strconv.FormatUint(uint64(*port.port()), 10)
+
+ if *port.Port == cport.ContainerPort {
+ ports := strconv.FormatUint(uint64(*port.Port), 10)
+ isInit := i >= len(pod.Spec.Containers)
target[podContainerNameLabel] = lv(c.Name)
target[podContainerImageLabel] = lv(c.Image)
target[podContainerPortNameLabel] = lv(cport.Name)
target[podContainerPortNumberLabel] = lv(ports)
target[podContainerPortProtocolLabel] = lv(string(cport.Protocol))
+ target[podContainerIsInit] = lv(strconv.FormatBool(isInit))
break
}
}
@@ -404,9 +395,9 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
tg.Targets = append(tg.Targets, target)
}
- for _, ep := range eps.endpoints() {
- for _, port := range eps.ports() {
- for _, addr := range ep.addresses() {
+ for _, ep := range eps.Endpoints {
+ for _, port := range eps.Ports {
+ for _, addr := range ep.Addresses {
add(addr, ep, port)
}
}
@@ -420,14 +411,15 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
continue
}
- for _, c := range pe.pod.Spec.Containers {
+ containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...)
+ for i, c := range containers {
for _, cport := range c.Ports {
hasSeenPort := func() bool {
for _, eport := range pe.servicePorts {
- if eport.port() == nil {
+ if eport.Port == nil {
continue
}
- if cport.ContainerPort == *eport.port() {
+ if cport.ContainerPort == *eport.Port {
return true
}
}
@@ -440,6 +432,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
+ isInit := i >= len(pe.pod.Spec.Containers)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
@@ -447,6 +440,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
+ podContainerIsInit: lv(strconv.FormatBool(isInit)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
@@ -460,13 +454,10 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
if ref == nil || ref.Kind != "Pod" {
return nil
}
- p := &apiv1.Pod{}
- p.Namespace = ref.Namespace
- p.Name = ref.Name
- obj, exists, err := e.podStore.Get(p)
+ obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name))
if err != nil {
- level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err)
+ e.logger.Error("resolving pod ref failed", "err", err)
return nil
}
if !exists {
@@ -475,29 +466,29 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
return obj.(*apiv1.Pod)
}
-func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgroup.Group) {
+func (e *EndpointSlice) addServiceLabels(esa v1.EndpointSlice, tg *targetgroup.Group) {
var (
- svc = &apiv1.Service{}
found bool
+ name string
)
- svc.Namespace = esa.namespace()
+ ns := esa.Namespace
// Every EndpointSlice object has the Service they belong to in the
// kubernetes.io/service-name label.
- svc.Name, found = esa.labels()[esa.labelServiceName()]
+ name, found = esa.Labels[v1.LabelServiceName]
if !found {
return
}
- obj, exists, err := e.serviceStore.Get(svc)
+ obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name))
if err != nil {
- level.Error(e.logger).Log("msg", "retrieving service failed", "err", err)
+ e.logger.Error("retrieving service failed", "err", err)
return
}
if !exists {
return
}
- svc = obj.(*apiv1.Service)
+ svc := obj.(*apiv1.Service)
tg.Labels = tg.Labels.Merge(serviceLabels(svc))
}
diff --git a/discovery/kubernetes/endpointslice_adaptor.go b/discovery/kubernetes/endpointslice_adaptor.go
deleted file mode 100644
index edd64fcb32..0000000000
--- a/discovery/kubernetes/endpointslice_adaptor.go
+++ /dev/null
@@ -1,325 +0,0 @@
-// Copyright 2020 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package kubernetes
-
-import (
- corev1 "k8s.io/api/core/v1"
- v1 "k8s.io/api/discovery/v1"
- "k8s.io/api/discovery/v1beta1"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-// endpointSliceAdaptor is an adaptor for the different EndpointSlice versions.
-type endpointSliceAdaptor interface {
- get() interface{}
- getObjectMeta() metav1.ObjectMeta
- name() string
- namespace() string
- addressType() string
- endpoints() []endpointSliceEndpointAdaptor
- ports() []endpointSlicePortAdaptor
- labels() map[string]string
- labelServiceName() string
-}
-
-type endpointSlicePortAdaptor interface {
- name() *string
- port() *int32
- protocol() *string
- appProtocol() *string
-}
-
-type endpointSliceEndpointAdaptor interface {
- addresses() []string
- hostname() *string
- nodename() *string
- zone() *string
- conditions() endpointSliceEndpointConditionsAdaptor
- targetRef() *corev1.ObjectReference
- topology() map[string]string
-}
-
-type endpointSliceEndpointConditionsAdaptor interface {
- ready() *bool
- serving() *bool
- terminating() *bool
-}
-
-// Adaptor for k8s.io/api/discovery/v1.
-type endpointSliceAdaptorV1 struct {
- endpointSlice *v1.EndpointSlice
-}
-
-func newEndpointSliceAdaptorFromV1(endpointSlice *v1.EndpointSlice) endpointSliceAdaptor {
- return &endpointSliceAdaptorV1{endpointSlice: endpointSlice}
-}
-
-func (e *endpointSliceAdaptorV1) get() interface{} {
- return e.endpointSlice
-}
-
-func (e *endpointSliceAdaptorV1) getObjectMeta() metav1.ObjectMeta {
- return e.endpointSlice.ObjectMeta
-}
-
-func (e *endpointSliceAdaptorV1) name() string {
- return e.endpointSlice.ObjectMeta.Name
-}
-
-func (e *endpointSliceAdaptorV1) namespace() string {
- return e.endpointSlice.ObjectMeta.Namespace
-}
-
-func (e *endpointSliceAdaptorV1) addressType() string {
- return string(e.endpointSlice.AddressType)
-}
-
-func (e *endpointSliceAdaptorV1) endpoints() []endpointSliceEndpointAdaptor {
- eps := make([]endpointSliceEndpointAdaptor, 0, len(e.endpointSlice.Endpoints))
- for i := 0; i < len(e.endpointSlice.Endpoints); i++ {
- eps = append(eps, newEndpointSliceEndpointAdaptorFromV1(e.endpointSlice.Endpoints[i]))
- }
- return eps
-}
-
-func (e *endpointSliceAdaptorV1) ports() []endpointSlicePortAdaptor {
- ports := make([]endpointSlicePortAdaptor, 0, len(e.endpointSlice.Ports))
- for i := 0; i < len(e.endpointSlice.Ports); i++ {
- ports = append(ports, newEndpointSlicePortAdaptorFromV1(e.endpointSlice.Ports[i]))
- }
- return ports
-}
-
-func (e *endpointSliceAdaptorV1) labels() map[string]string {
- return e.endpointSlice.Labels
-}
-
-func (e *endpointSliceAdaptorV1) labelServiceName() string {
- return v1.LabelServiceName
-}
-
-// Adaptor for k8s.io/api/discovery/v1beta1.
-type endpointSliceAdaptorV1Beta1 struct {
- endpointSlice *v1beta1.EndpointSlice
-}
-
-func newEndpointSliceAdaptorFromV1beta1(endpointSlice *v1beta1.EndpointSlice) endpointSliceAdaptor {
- return &endpointSliceAdaptorV1Beta1{endpointSlice: endpointSlice}
-}
-
-func (e *endpointSliceAdaptorV1Beta1) get() interface{} {
- return e.endpointSlice
-}
-
-func (e *endpointSliceAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta {
- return e.endpointSlice.ObjectMeta
-}
-
-func (e *endpointSliceAdaptorV1Beta1) name() string {
- return e.endpointSlice.Name
-}
-
-func (e *endpointSliceAdaptorV1Beta1) namespace() string {
- return e.endpointSlice.Namespace
-}
-
-func (e *endpointSliceAdaptorV1Beta1) addressType() string {
- return string(e.endpointSlice.AddressType)
-}
-
-func (e *endpointSliceAdaptorV1Beta1) endpoints() []endpointSliceEndpointAdaptor {
- eps := make([]endpointSliceEndpointAdaptor, 0, len(e.endpointSlice.Endpoints))
- for i := 0; i < len(e.endpointSlice.Endpoints); i++ {
- eps = append(eps, newEndpointSliceEndpointAdaptorFromV1beta1(e.endpointSlice.Endpoints[i]))
- }
- return eps
-}
-
-func (e *endpointSliceAdaptorV1Beta1) ports() []endpointSlicePortAdaptor {
- ports := make([]endpointSlicePortAdaptor, 0, len(e.endpointSlice.Ports))
- for i := 0; i < len(e.endpointSlice.Ports); i++ {
- ports = append(ports, newEndpointSlicePortAdaptorFromV1beta1(e.endpointSlice.Ports[i]))
- }
- return ports
-}
-
-func (e *endpointSliceAdaptorV1Beta1) labels() map[string]string {
- return e.endpointSlice.Labels
-}
-
-func (e *endpointSliceAdaptorV1Beta1) labelServiceName() string {
- return v1beta1.LabelServiceName
-}
-
-type endpointSliceEndpointAdaptorV1 struct {
- endpoint v1.Endpoint
-}
-
-func newEndpointSliceEndpointAdaptorFromV1(endpoint v1.Endpoint) endpointSliceEndpointAdaptor {
- return &endpointSliceEndpointAdaptorV1{endpoint: endpoint}
-}
-
-func (e *endpointSliceEndpointAdaptorV1) addresses() []string {
- return e.endpoint.Addresses
-}
-
-func (e *endpointSliceEndpointAdaptorV1) hostname() *string {
- return e.endpoint.Hostname
-}
-
-func (e *endpointSliceEndpointAdaptorV1) nodename() *string {
- return e.endpoint.NodeName
-}
-
-func (e *endpointSliceEndpointAdaptorV1) zone() *string {
- return e.endpoint.Zone
-}
-
-func (e *endpointSliceEndpointAdaptorV1) conditions() endpointSliceEndpointConditionsAdaptor {
- return newEndpointSliceEndpointConditionsAdaptorFromV1(e.endpoint.Conditions)
-}
-
-func (e *endpointSliceEndpointAdaptorV1) targetRef() *corev1.ObjectReference {
- return e.endpoint.TargetRef
-}
-
-func (e *endpointSliceEndpointAdaptorV1) topology() map[string]string {
- return e.endpoint.DeprecatedTopology
-}
-
-type endpointSliceEndpointConditionsAdaptorV1 struct {
- endpointConditions v1.EndpointConditions
-}
-
-func newEndpointSliceEndpointConditionsAdaptorFromV1(endpointConditions v1.EndpointConditions) endpointSliceEndpointConditionsAdaptor {
- return &endpointSliceEndpointConditionsAdaptorV1{endpointConditions: endpointConditions}
-}
-
-func (e *endpointSliceEndpointConditionsAdaptorV1) ready() *bool {
- return e.endpointConditions.Ready
-}
-
-func (e *endpointSliceEndpointConditionsAdaptorV1) serving() *bool {
- return e.endpointConditions.Serving
-}
-
-func (e *endpointSliceEndpointConditionsAdaptorV1) terminating() *bool {
- return e.endpointConditions.Terminating
-}
-
-type endpointSliceEndpointAdaptorV1beta1 struct {
- endpoint v1beta1.Endpoint
-}
-
-func newEndpointSliceEndpointAdaptorFromV1beta1(endpoint v1beta1.Endpoint) endpointSliceEndpointAdaptor {
- return &endpointSliceEndpointAdaptorV1beta1{endpoint: endpoint}
-}
-
-func (e *endpointSliceEndpointAdaptorV1beta1) addresses() []string {
- return e.endpoint.Addresses
-}
-
-func (e *endpointSliceEndpointAdaptorV1beta1) hostname() *string {
- return e.endpoint.Hostname
-}
-
-func (e *endpointSliceEndpointAdaptorV1beta1) nodename() *string {
- return e.endpoint.NodeName
-}
-
-func (e *endpointSliceEndpointAdaptorV1beta1) zone() *string {
- return nil
-}
-
-func (e *endpointSliceEndpointAdaptorV1beta1) conditions() endpointSliceEndpointConditionsAdaptor {
- return newEndpointSliceEndpointConditionsAdaptorFromV1beta1(e.endpoint.Conditions)
-}
-
-func (e *endpointSliceEndpointAdaptorV1beta1) targetRef() *corev1.ObjectReference {
- return e.endpoint.TargetRef
-}
-
-func (e *endpointSliceEndpointAdaptorV1beta1) topology() map[string]string {
- return e.endpoint.Topology
-}
-
-type endpointSliceEndpointConditionsAdaptorV1beta1 struct {
- endpointConditions v1beta1.EndpointConditions
-}
-
-func newEndpointSliceEndpointConditionsAdaptorFromV1beta1(endpointConditions v1beta1.EndpointConditions) endpointSliceEndpointConditionsAdaptor {
- return &endpointSliceEndpointConditionsAdaptorV1beta1{endpointConditions: endpointConditions}
-}
-
-func (e *endpointSliceEndpointConditionsAdaptorV1beta1) ready() *bool {
- return e.endpointConditions.Ready
-}
-
-func (e *endpointSliceEndpointConditionsAdaptorV1beta1) serving() *bool {
- return e.endpointConditions.Serving
-}
-
-func (e *endpointSliceEndpointConditionsAdaptorV1beta1) terminating() *bool {
- return e.endpointConditions.Terminating
-}
-
-type endpointSlicePortAdaptorV1 struct {
- endpointPort v1.EndpointPort
-}
-
-func newEndpointSlicePortAdaptorFromV1(port v1.EndpointPort) endpointSlicePortAdaptor {
- return &endpointSlicePortAdaptorV1{endpointPort: port}
-}
-
-func (e *endpointSlicePortAdaptorV1) name() *string {
- return e.endpointPort.Name
-}
-
-func (e *endpointSlicePortAdaptorV1) port() *int32 {
- return e.endpointPort.Port
-}
-
-func (e *endpointSlicePortAdaptorV1) protocol() *string {
- val := string(*e.endpointPort.Protocol)
- return &val
-}
-
-func (e *endpointSlicePortAdaptorV1) appProtocol() *string {
- return e.endpointPort.AppProtocol
-}
-
-type endpointSlicePortAdaptorV1beta1 struct {
- endpointPort v1beta1.EndpointPort
-}
-
-func newEndpointSlicePortAdaptorFromV1beta1(port v1beta1.EndpointPort) endpointSlicePortAdaptor {
- return &endpointSlicePortAdaptorV1beta1{endpointPort: port}
-}
-
-func (e *endpointSlicePortAdaptorV1beta1) name() *string {
- return e.endpointPort.Name
-}
-
-func (e *endpointSlicePortAdaptorV1beta1) port() *int32 {
- return e.endpointPort.Port
-}
-
-func (e *endpointSlicePortAdaptorV1beta1) protocol() *string {
- val := string(*e.endpointPort.Protocol)
- return &val
-}
-
-func (e *endpointSlicePortAdaptorV1beta1) appProtocol() *string {
- return e.endpointPort.AppProtocol
-}
diff --git a/discovery/kubernetes/endpointslice_adaptor_test.go b/discovery/kubernetes/endpointslice_adaptor_test.go
deleted file mode 100644
index 1ee3337193..0000000000
--- a/discovery/kubernetes/endpointslice_adaptor_test.go
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2020 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package kubernetes
-
-import (
- "testing"
-
- "github.com/stretchr/testify/require"
- v1 "k8s.io/api/discovery/v1"
- "k8s.io/api/discovery/v1beta1"
-)
-
-func Test_EndpointSliceAdaptor_v1(t *testing.T) {
- endpointSlice := makeEndpointSliceV1()
- adaptor := newEndpointSliceAdaptorFromV1(endpointSlice)
-
- require.Equal(t, endpointSlice.ObjectMeta.Name, adaptor.name())
- require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
- require.Equal(t, endpointSlice.AddressType, v1.AddressType(adaptor.addressType()))
- require.Equal(t, endpointSlice.Labels, adaptor.labels())
- require.Equal(t, "testendpoints", endpointSlice.Labels[v1.LabelServiceName])
-
- for i, endpointAdaptor := range adaptor.endpoints() {
- require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())
- require.Equal(t, endpointSlice.Endpoints[i].Hostname, endpointAdaptor.hostname())
- require.Equal(t, endpointSlice.Endpoints[i].Conditions.Ready, endpointAdaptor.conditions().ready())
- require.Equal(t, endpointSlice.Endpoints[i].Conditions.Serving, endpointAdaptor.conditions().serving())
- require.Equal(t, endpointSlice.Endpoints[i].Conditions.Terminating, endpointAdaptor.conditions().terminating())
- require.Equal(t, endpointSlice.Endpoints[i].TargetRef, endpointAdaptor.targetRef())
- require.Equal(t, endpointSlice.Endpoints[i].DeprecatedTopology, endpointAdaptor.topology())
- }
-
- for i, portAdaptor := range adaptor.ports() {
- require.Equal(t, endpointSlice.Ports[i].Name, portAdaptor.name())
- require.Equal(t, endpointSlice.Ports[i].Port, portAdaptor.port())
- require.EqualValues(t, endpointSlice.Ports[i].Protocol, portAdaptor.protocol())
- require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol())
- }
-}
-
-func Test_EndpointSliceAdaptor_v1beta1(t *testing.T) {
- endpointSlice := makeEndpointSliceV1beta1()
- adaptor := newEndpointSliceAdaptorFromV1beta1(endpointSlice)
-
- require.Equal(t, endpointSlice.ObjectMeta.Name, adaptor.name())
- require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
- require.Equal(t, endpointSlice.AddressType, v1beta1.AddressType(adaptor.addressType()))
- require.Equal(t, endpointSlice.Labels, adaptor.labels())
- require.Equal(t, "testendpoints", endpointSlice.Labels[v1beta1.LabelServiceName])
-
- for i, endpointAdaptor := range adaptor.endpoints() {
- require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())
- require.Equal(t, endpointSlice.Endpoints[i].Hostname, endpointAdaptor.hostname())
- require.Equal(t, endpointSlice.Endpoints[i].Conditions.Ready, endpointAdaptor.conditions().ready())
- require.Equal(t, endpointSlice.Endpoints[i].Conditions.Serving, endpointAdaptor.conditions().serving())
- require.Equal(t, endpointSlice.Endpoints[i].Conditions.Terminating, endpointAdaptor.conditions().terminating())
- require.Equal(t, endpointSlice.Endpoints[i].TargetRef, endpointAdaptor.targetRef())
- require.Equal(t, endpointSlice.Endpoints[i].Topology, endpointAdaptor.topology())
- }
-
- for i, portAdaptor := range adaptor.ports() {
- require.Equal(t, endpointSlice.Ports[i].Name, portAdaptor.name())
- require.Equal(t, endpointSlice.Ports[i].Port, portAdaptor.port())
- require.EqualValues(t, endpointSlice.Ports[i].Protocol, portAdaptor.protocol())
- require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol())
- }
-}
diff --git a/discovery/kubernetes/endpointslice_test.go b/discovery/kubernetes/endpointslice_test.go
index 6ef83081be..9eea9abd7b 100644
--- a/discovery/kubernetes/endpointslice_test.go
+++ b/discovery/kubernetes/endpointslice_test.go
@@ -21,7 +21,6 @@ import (
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
- "k8s.io/api/discovery/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
@@ -114,62 +113,9 @@ func makeEndpointSliceV1() *v1.EndpointSlice {
}
}
-func makeEndpointSliceV1beta1() *v1beta1.EndpointSlice {
- return &v1beta1.EndpointSlice{
- ObjectMeta: metav1.ObjectMeta{
- Name: "testendpoints",
- Namespace: "default",
- Labels: map[string]string{
- v1beta1.LabelServiceName: "testendpoints",
- },
- Annotations: map[string]string{
- "test.annotation": "test",
- },
- },
- AddressType: v1beta1.AddressTypeIPv4,
- Ports: []v1beta1.EndpointPort{
- {
- Name: strptr("testport"),
- Port: int32ptr(9000),
- Protocol: protocolptr(corev1.ProtocolTCP),
- },
- },
- Endpoints: []v1beta1.Endpoint{
- {
- Addresses: []string{"1.2.3.4"},
- Hostname: strptr("testendpoint1"),
- }, {
- Addresses: []string{"2.3.4.5"},
- Conditions: v1beta1.EndpointConditions{
- Ready: boolptr(true),
- Serving: boolptr(true),
- Terminating: boolptr(false),
- },
- }, {
- Addresses: []string{"3.4.5.6"},
- Conditions: v1beta1.EndpointConditions{
- Ready: boolptr(false),
- Serving: boolptr(true),
- Terminating: boolptr(true),
- },
- }, {
- Addresses: []string{"4.5.6.7"},
- Conditions: v1beta1.EndpointConditions{
- Ready: boolptr(true),
- Serving: boolptr(true),
- Terminating: boolptr(false),
- },
- TargetRef: &corev1.ObjectReference{
- Kind: "Node",
- Name: "barbaz",
- },
- },
- },
- }
-}
-
func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.25.0")
+ t.Parallel()
+ n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{
discovery: n,
@@ -249,72 +195,8 @@ func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) {
}.Run(t)
}
-func TestEndpointSliceDiscoveryBeforeRunV1beta1(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "1.20.0")
-
- k8sDiscoveryTest{
- discovery: n,
- beforeRun: func() {
- obj := makeEndpointSliceV1beta1()
- c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
- },
- expectedMaxItems: 1,
- expectedRes: map[string]*targetgroup.Group{
- "endpointslice/default/testendpoints": {
- Targets: []model.LabelSet{
- {
- "__address__": "1.2.3.4:9000",
- "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "2.3.4.5:9000",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "3.4.5.6:9000",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "4.5.6.7:9000",
- "__meta_kubernetes_endpointslice_address_target_kind": "Node",
- "__meta_kubernetes_endpointslice_address_target_name": "barbaz",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- },
- Labels: model.LabelSet{
- "__meta_kubernetes_endpointslice_address_type": "IPv4",
- "__meta_kubernetes_namespace": "default",
- "__meta_kubernetes_endpointslice_name": "testendpoints",
- "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
- "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
- "__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
- "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
- },
- Source: "endpointslice/default/testendpoints",
- },
- },
- }.Run(t)
-}
-
func TestEndpointSliceDiscoveryAdd(t *testing.T) {
+ t.Parallel()
obj := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
@@ -353,25 +235,25 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
PodIP: "1.2.3.4",
},
}
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.20.0", obj)
+ n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, obj)
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
- obj := &v1beta1.EndpointSlice{
+ obj := &v1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
Namespace: "default",
},
- AddressType: v1beta1.AddressTypeIPv4,
- Ports: []v1beta1.EndpointPort{
+ AddressType: v1.AddressTypeIPv4,
+ Ports: []v1.EndpointPort{
{
Name: strptr("testport"),
Port: int32ptr(9000),
Protocol: protocolptr(corev1.ProtocolTCP),
},
},
- Endpoints: []v1beta1.Endpoint{
+ Endpoints: []v1.Endpoint{
{
Addresses: []string{"4.3.2.1"},
TargetRef: &corev1.ObjectReference{
@@ -379,13 +261,13 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
Name: "testpod",
Namespace: "default",
},
- Conditions: v1beta1.EndpointConditions{
+ Conditions: v1.EndpointConditions{
Ready: boolptr(false),
},
},
},
}
- c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
+ c.DiscoveryV1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
},
expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{
@@ -411,6 +293,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
},
{
"__address__": "1.2.3.4:9001",
@@ -426,6 +309,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@@ -440,118 +324,36 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
}
func TestEndpointSliceDiscoveryDelete(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
+ t.Parallel()
+ n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := makeEndpointSliceV1()
- c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{})
+ c.DiscoveryV1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{})
},
expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{
"endpointslice/default/testendpoints": {
Source: "endpointslice/default/testendpoints",
- Targets: []model.LabelSet{
- {
- "__address__": "1.2.3.4:9000",
- "__meta_kubernetes_endpointslice_address_target_kind": "",
- "__meta_kubernetes_endpointslice_address_target_name": "",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1",
- "__meta_kubernetes_endpointslice_endpoint_node_name": "foobar",
- "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true",
- "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "2.3.4.5:9000",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "3.4.5.6:9000",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "4.5.6.7:9000",
- "__meta_kubernetes_endpointslice_address_target_kind": "Node",
- "__meta_kubernetes_endpointslice_address_target_name": "barbaz",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- },
- Labels: map[model.LabelName]model.LabelValue{
- "__meta_kubernetes_endpointslice_address_type": "IPv4",
- "__meta_kubernetes_endpointslice_name": "testendpoints",
- "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
- "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
- "__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
- "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
- "__meta_kubernetes_namespace": "default",
- },
},
},
}.Run(t)
}
func TestEndpointSliceDiscoveryUpdate(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
+ t.Parallel()
+ n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
- obj := &v1beta1.EndpointSlice{
- ObjectMeta: metav1.ObjectMeta{
- Name: "testendpoints",
- Namespace: "default",
- },
- AddressType: v1beta1.AddressTypeIPv4,
- Ports: []v1beta1.EndpointPort{
- {
- Name: strptr("testport"),
- Port: int32ptr(9000),
- Protocol: protocolptr(corev1.ProtocolTCP),
- },
- },
- Endpoints: []v1beta1.Endpoint{
- {
- Addresses: []string{"1.2.3.4"},
- Hostname: strptr("testendpoint1"),
- }, {
- Addresses: []string{"2.3.4.5"},
- Conditions: v1beta1.EndpointConditions{
- Ready: boolptr(true),
- },
- },
- },
- }
- c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
+ obj := makeEndpointSliceV1()
+ obj.ObjectMeta.Labels = nil
+ obj.ObjectMeta.Annotations = nil
+ obj.Endpoints = obj.Endpoints[0:2]
+ c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
},
expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{
@@ -586,39 +388,11 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
- {
- "__address__": "3.4.5.6:9000",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "4.5.6.7:9000",
- "__meta_kubernetes_endpointslice_address_target_kind": "Node",
- "__meta_kubernetes_endpointslice_address_target_name": "barbaz",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
},
Labels: model.LabelSet{
- "__meta_kubernetes_endpointslice_address_type": "IPv4",
- "__meta_kubernetes_endpointslice_name": "testendpoints",
- "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
- "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
- "__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
- "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
- "__meta_kubernetes_namespace": "default",
+ "__meta_kubernetes_endpointslice_address_type": "IPv4",
+ "__meta_kubernetes_endpointslice_name": "testendpoints",
+ "__meta_kubernetes_namespace": "default",
},
},
},
@@ -626,85 +400,19 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) {
}
func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
+ t.Parallel()
+ n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
- obj := &v1beta1.EndpointSlice{
- ObjectMeta: metav1.ObjectMeta{
- Name: "testendpoints",
- Namespace: "default",
- },
- AddressType: v1beta1.AddressTypeIPv4,
- Ports: []v1beta1.EndpointPort{
- {
- Name: strptr("testport"),
- Port: int32ptr(9000),
- Protocol: protocolptr(corev1.ProtocolTCP),
- },
- },
- Endpoints: []v1beta1.Endpoint{},
- }
- c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
+ obj := makeEndpointSliceV1()
+ obj.Endpoints = []v1.Endpoint{}
+ c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
},
expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{
"endpointslice/default/testendpoints": {
- Targets: []model.LabelSet{
- {
- "__address__": "1.2.3.4:9000",
- "__meta_kubernetes_endpointslice_address_target_kind": "",
- "__meta_kubernetes_endpointslice_address_target_name": "",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1",
- "__meta_kubernetes_endpointslice_endpoint_node_name": "foobar",
- "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true",
- "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "2.3.4.5:9000",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "3.4.5.6:9000",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- {
- "__address__": "4.5.6.7:9000",
- "__meta_kubernetes_endpointslice_address_target_kind": "Node",
- "__meta_kubernetes_endpointslice_address_target_name": "barbaz",
- "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
- "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
- "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
- "__meta_kubernetes_endpointslice_port": "9000",
- "__meta_kubernetes_endpointslice_port_app_protocol": "http",
- "__meta_kubernetes_endpointslice_port_name": "testport",
- "__meta_kubernetes_endpointslice_port_protocol": "TCP",
- },
- },
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
@@ -721,7 +429,8 @@ func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) {
}
func TestEndpointSliceDiscoveryWithService(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
+ t.Parallel()
+ n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
@@ -813,7 +522,8 @@ func TestEndpointSliceDiscoveryWithService(t *testing.T) {
}
func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
+ t.Parallel()
+ n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
@@ -920,6 +630,7 @@ func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) {
}
func TestEndpointsSlicesDiscoveryWithNodeMetadata(t *testing.T) {
+ t.Parallel()
metadataConfig := AttachMetadataConfig{Node: true}
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
@@ -1019,6 +730,7 @@ func TestEndpointsSlicesDiscoveryWithNodeMetadata(t *testing.T) {
}
func TestEndpointsSlicesDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
+ t.Parallel()
metadataConfig := AttachMetadataConfig{Node: true}
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
@@ -1124,6 +836,7 @@ func TestEndpointsSlicesDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
}
func TestEndpointSliceDiscoveryNamespaces(t *testing.T) {
+ t.Parallel()
epOne := makeEndpointSliceV1()
epOne.Namespace = "ns1"
objs := []runtime.Object{
@@ -1285,6 +998,7 @@ func TestEndpointSliceDiscoveryNamespaces(t *testing.T) {
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@@ -1299,6 +1013,7 @@ func TestEndpointSliceDiscoveryNamespaces(t *testing.T) {
}
func TestEndpointSliceDiscoveryOwnNamespace(t *testing.T) {
+ t.Parallel()
epOne := makeEndpointSliceV1()
epOne.Namespace = "own-ns"
@@ -1419,6 +1134,7 @@ func TestEndpointSliceDiscoveryOwnNamespace(t *testing.T) {
}
func TestEndpointSliceDiscoveryEmptyPodStatus(t *testing.T) {
+ t.Parallel()
ep := makeEndpointSliceV1()
ep.Namespace = "ns"
@@ -1465,6 +1181,7 @@ func TestEndpointSliceDiscoveryEmptyPodStatus(t *testing.T) {
// sets up indexing for the main Kube informer only when needed.
// See: https://github.com/prometheus/prometheus/pull/13554#discussion_r1490965817
func TestEndpointSliceInfIndexersCount(t *testing.T) {
+ t.Parallel()
tests := []struct {
name string
withNodeMetadata bool
@@ -1475,12 +1192,14 @@ func TestEndpointSliceInfIndexersCount(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
var (
- n *Discovery
- mainInfIndexersCount int
+ n *Discovery
+ // service indexer is enabled by default
+ mainInfIndexersCount = 1
)
if tc.withNodeMetadata {
- mainInfIndexersCount = 1
+ mainInfIndexersCount++
n, _ = makeDiscoveryWithMetadata(RoleEndpointSlice, NamespaceDiscovery{}, AttachMetadataConfig{Node: true})
} else {
n, _ = makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{})
@@ -1498,3 +1217,166 @@ func TestEndpointSliceInfIndexersCount(t *testing.T) {
})
}
}
+
+func TestEndpointSliceDiscoverySidecarContainer(t *testing.T) {
+ t.Parallel()
+ objs := []runtime.Object{
+ &v1.EndpointSlice{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "testsidecar",
+ Namespace: "default",
+ },
+ AddressType: v1.AddressTypeIPv4,
+ Ports: []v1.EndpointPort{
+ {
+ Name: strptr("testport"),
+ Port: int32ptr(9000),
+ Protocol: protocolptr(corev1.ProtocolTCP),
+ },
+ {
+ Name: strptr("initport"),
+ Port: int32ptr(9111),
+ Protocol: protocolptr(corev1.ProtocolTCP),
+ },
+ },
+ Endpoints: []v1.Endpoint{
+ {
+ Addresses: []string{"4.3.2.1"},
+ TargetRef: &corev1.ObjectReference{
+ Kind: "Pod",
+ Name: "testpod",
+ Namespace: "default",
+ },
+ },
+ },
+ },
+ &corev1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "testpod",
+ Namespace: "default",
+ UID: types.UID("deadbeef"),
+ },
+ Spec: corev1.PodSpec{
+ NodeName: "testnode",
+ InitContainers: []corev1.Container{
+ {
+ Name: "ic1",
+ Image: "ic1:latest",
+ Ports: []corev1.ContainerPort{
+ {
+ Name: "initport",
+ ContainerPort: 1111,
+ Protocol: corev1.ProtocolTCP,
+ },
+ },
+ },
+ {
+ Name: "ic2",
+ Image: "ic2:latest",
+ Ports: []corev1.ContainerPort{
+ {
+ Name: "initport",
+ ContainerPort: 9111,
+ Protocol: corev1.ProtocolTCP,
+ },
+ },
+ },
+ },
+ Containers: []corev1.Container{
+ {
+ Name: "c1",
+ Image: "c1:latest",
+ Ports: []corev1.ContainerPort{
+ {
+ Name: "mainport",
+ ContainerPort: 9000,
+ Protocol: corev1.ProtocolTCP,
+ },
+ },
+ },
+ },
+ },
+ Status: corev1.PodStatus{
+ HostIP: "2.3.4.5",
+ PodIP: "4.3.2.1",
+ },
+ },
+ }
+
+ n, _ := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{}, objs...)
+
+ k8sDiscoveryTest{
+ discovery: n,
+ expectedMaxItems: 1,
+ expectedRes: map[string]*targetgroup.Group{
+ "endpointslice/default/testsidecar": {
+ Targets: []model.LabelSet{
+ {
+ "__address__": "4.3.2.1:9000",
+ "__meta_kubernetes_endpointslice_address_target_kind": "Pod",
+ "__meta_kubernetes_endpointslice_address_target_name": "testpod",
+ "__meta_kubernetes_endpointslice_port": "9000",
+ "__meta_kubernetes_endpointslice_port_name": "testport",
+ "__meta_kubernetes_endpointslice_port_protocol": "TCP",
+ "__meta_kubernetes_pod_container_image": "c1:latest",
+ "__meta_kubernetes_pod_container_name": "c1",
+ "__meta_kubernetes_pod_container_port_name": "mainport",
+ "__meta_kubernetes_pod_container_port_number": "9000",
+ "__meta_kubernetes_pod_container_port_protocol": "TCP",
+ "__meta_kubernetes_pod_host_ip": "2.3.4.5",
+ "__meta_kubernetes_pod_ip": "4.3.2.1",
+ "__meta_kubernetes_pod_name": "testpod",
+ "__meta_kubernetes_pod_node_name": "testnode",
+ "__meta_kubernetes_pod_phase": "",
+ "__meta_kubernetes_pod_ready": "unknown",
+ "__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "false",
+ },
+ {
+ "__address__": "4.3.2.1:9111",
+ "__meta_kubernetes_endpointslice_address_target_kind": "Pod",
+ "__meta_kubernetes_endpointslice_address_target_name": "testpod",
+ "__meta_kubernetes_endpointslice_port": "9111",
+ "__meta_kubernetes_endpointslice_port_name": "initport",
+ "__meta_kubernetes_endpointslice_port_protocol": "TCP",
+ "__meta_kubernetes_pod_container_image": "ic2:latest",
+ "__meta_kubernetes_pod_container_name": "ic2",
+ "__meta_kubernetes_pod_container_port_name": "initport",
+ "__meta_kubernetes_pod_container_port_number": "9111",
+ "__meta_kubernetes_pod_container_port_protocol": "TCP",
+ "__meta_kubernetes_pod_host_ip": "2.3.4.5",
+ "__meta_kubernetes_pod_ip": "4.3.2.1",
+ "__meta_kubernetes_pod_name": "testpod",
+ "__meta_kubernetes_pod_node_name": "testnode",
+ "__meta_kubernetes_pod_phase": "",
+ "__meta_kubernetes_pod_ready": "unknown",
+ "__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "true",
+ },
+ {
+ "__address__": "4.3.2.1:1111",
+ "__meta_kubernetes_pod_container_image": "ic1:latest",
+ "__meta_kubernetes_pod_container_name": "ic1",
+ "__meta_kubernetes_pod_container_port_name": "initport",
+ "__meta_kubernetes_pod_container_port_number": "1111",
+ "__meta_kubernetes_pod_container_port_protocol": "TCP",
+ "__meta_kubernetes_pod_host_ip": "2.3.4.5",
+ "__meta_kubernetes_pod_ip": "4.3.2.1",
+ "__meta_kubernetes_pod_name": "testpod",
+ "__meta_kubernetes_pod_node_name": "testnode",
+ "__meta_kubernetes_pod_phase": "",
+ "__meta_kubernetes_pod_ready": "unknown",
+ "__meta_kubernetes_pod_uid": "deadbeef",
+ "__meta_kubernetes_pod_container_init": "true",
+ },
+ },
+ Labels: model.LabelSet{
+ "__meta_kubernetes_endpointslice_address_type": "IPv4",
+ "__meta_kubernetes_endpointslice_name": "testsidecar",
+ "__meta_kubernetes_namespace": "default",
+ },
+ Source: "endpointslice/default/testsidecar",
+ },
+ },
+ }.Run(t)
+}
diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go
index 7b6366b257..0de574471f 100644
--- a/discovery/kubernetes/ingress.go
+++ b/discovery/kubernetes/ingress.go
@@ -17,14 +17,12 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"strings"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
- "k8s.io/api/networking/v1beta1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@@ -33,14 +31,14 @@ import (
// Ingress implements discovery of Kubernetes ingress.
type Ingress struct {
- logger log.Logger
+ logger *slog.Logger
informer cache.SharedInformer
store cache.Store
queue *workqueue.Type
}
// NewIngress returns a new ingress discovery.
-func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress {
+func NewIngress(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress {
ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd)
ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate)
ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete)
@@ -67,7 +65,7 @@ func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding ingresses event handler.", "err", err)
+ l.Error("Error adding ingresses event handler.", "err", err)
}
return s
}
@@ -87,7 +85,7 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), i.informer.HasSynced) {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(i.logger).Log("msg", "ingress informer unable to sync cache")
+ i.logger.Error("ingress informer unable to sync cache")
}
return
}
@@ -123,23 +121,18 @@ func (i *Ingress) process(ctx context.Context, ch chan<- []*targetgroup.Group) b
return true
}
- var ia ingressAdaptor
- switch ingress := o.(type) {
- case *v1.Ingress:
- ia = newIngressAdaptorFromV1(ingress)
- case *v1beta1.Ingress:
- ia = newIngressAdaptorFromV1beta1(ingress)
- default:
- level.Error(i.logger).Log("msg", "converting to Ingress object failed", "err",
+ if ingress, ok := o.(*v1.Ingress); ok {
+ send(ctx, ch, i.buildIngress(*ingress))
+ } else {
+ i.logger.Error("converting to Ingress object failed", "err",
fmt.Errorf("received unexpected object: %v", o))
return true
}
- send(ctx, ch, i.buildIngress(ia))
return true
}
-func ingressSource(s ingressAdaptor) string {
- return ingressSourceFromNamespaceAndName(s.namespace(), s.name())
+func ingressSource(s v1.Ingress) string {
+ return ingressSourceFromNamespaceAndName(s.Namespace, s.Name)
}
func ingressSourceFromNamespaceAndName(namespace, name string) string {
@@ -153,15 +146,15 @@ const (
ingressClassNameLabel = metaLabelPrefix + "ingress_class_name"
)
-func ingressLabels(ingress ingressAdaptor) model.LabelSet {
+func ingressLabels(ingress v1.Ingress) model.LabelSet {
// Each label and annotation will create two key-value pairs in the map.
ls := make(model.LabelSet)
- ls[namespaceLabel] = lv(ingress.namespace())
- if cls := ingress.ingressClassName(); cls != nil {
+ ls[namespaceLabel] = lv(ingress.Namespace)
+ if cls := ingress.Spec.IngressClassName; cls != nil {
ls[ingressClassNameLabel] = lv(*cls)
}
- addObjectMetaLabels(ls, ingress.getObjectMeta(), RoleIngress)
+ addObjectMetaLabels(ls, ingress.ObjectMeta, RoleIngress)
return ls
}
@@ -181,19 +174,39 @@ func pathsFromIngressPaths(ingressPaths []string) []string {
return paths
}
-func (i *Ingress) buildIngress(ingress ingressAdaptor) *targetgroup.Group {
+func rulePaths(rule v1.IngressRule) []string {
+ rv := rule.IngressRuleValue
+ if rv.HTTP == nil {
+ return nil
+ }
+ paths := make([]string, len(rv.HTTP.Paths))
+ for n, p := range rv.HTTP.Paths {
+ paths[n] = p.Path
+ }
+ return paths
+}
+
+func tlsHosts(ingressTLS []v1.IngressTLS) []string {
+ var hosts []string
+ for _, tls := range ingressTLS {
+ hosts = append(hosts, tls.Hosts...)
+ }
+ return hosts
+}
+
+func (i *Ingress) buildIngress(ingress v1.Ingress) *targetgroup.Group {
tg := &targetgroup.Group{
Source: ingressSource(ingress),
}
tg.Labels = ingressLabels(ingress)
- for _, rule := range ingress.rules() {
+ for _, rule := range ingress.Spec.Rules {
scheme := "http"
- paths := pathsFromIngressPaths(rule.paths())
+ paths := pathsFromIngressPaths(rulePaths(rule))
out:
- for _, pattern := range ingress.tlsHosts() {
- if matchesHostnamePattern(pattern, rule.host()) {
+ for _, pattern := range tlsHosts(ingress.Spec.TLS) {
+ if matchesHostnamePattern(pattern, rule.Host) {
scheme = "https"
break out
}
@@ -201,9 +214,9 @@ func (i *Ingress) buildIngress(ingress ingressAdaptor) *targetgroup.Group {
for _, path := range paths {
tg.Targets = append(tg.Targets, model.LabelSet{
- model.AddressLabel: lv(rule.host()),
+ model.AddressLabel: lv(rule.Host),
ingressSchemeLabel: lv(scheme),
- ingressHostLabel: lv(rule.host()),
+ ingressHostLabel: lv(rule.Host),
ingressPathLabel: lv(path),
})
}
diff --git a/discovery/kubernetes/ingress_adaptor.go b/discovery/kubernetes/ingress_adaptor.go
deleted file mode 100644
index d1a7b7f2a2..0000000000
--- a/discovery/kubernetes/ingress_adaptor.go
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright 2016 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package kubernetes
-
-import (
- v1 "k8s.io/api/networking/v1"
- "k8s.io/api/networking/v1beta1"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-// ingressAdaptor is an adaptor for the different Ingress versions.
-type ingressAdaptor interface {
- getObjectMeta() metav1.ObjectMeta
- name() string
- namespace() string
- labels() map[string]string
- annotations() map[string]string
- tlsHosts() []string
- ingressClassName() *string
- rules() []ingressRuleAdaptor
-}
-
-type ingressRuleAdaptor interface {
- paths() []string
- host() string
-}
-
-// Adaptor for networking.k8s.io/v1.
-type ingressAdaptorV1 struct {
- ingress *v1.Ingress
-}
-
-func newIngressAdaptorFromV1(ingress *v1.Ingress) ingressAdaptor {
- return &ingressAdaptorV1{ingress: ingress}
-}
-
-func (i *ingressAdaptorV1) getObjectMeta() metav1.ObjectMeta { return i.ingress.ObjectMeta }
-func (i *ingressAdaptorV1) name() string { return i.ingress.Name }
-func (i *ingressAdaptorV1) namespace() string { return i.ingress.Namespace }
-func (i *ingressAdaptorV1) labels() map[string]string { return i.ingress.Labels }
-func (i *ingressAdaptorV1) annotations() map[string]string { return i.ingress.Annotations }
-func (i *ingressAdaptorV1) ingressClassName() *string { return i.ingress.Spec.IngressClassName }
-
-func (i *ingressAdaptorV1) tlsHosts() []string {
- var hosts []string
- for _, tls := range i.ingress.Spec.TLS {
- hosts = append(hosts, tls.Hosts...)
- }
- return hosts
-}
-
-func (i *ingressAdaptorV1) rules() []ingressRuleAdaptor {
- var rules []ingressRuleAdaptor
- for _, rule := range i.ingress.Spec.Rules {
- rules = append(rules, newIngressRuleAdaptorFromV1(rule))
- }
- return rules
-}
-
-type ingressRuleAdaptorV1 struct {
- rule v1.IngressRule
-}
-
-func newIngressRuleAdaptorFromV1(rule v1.IngressRule) ingressRuleAdaptor {
- return &ingressRuleAdaptorV1{rule: rule}
-}
-
-func (i *ingressRuleAdaptorV1) paths() []string {
- rv := i.rule.IngressRuleValue
- if rv.HTTP == nil {
- return nil
- }
- paths := make([]string, len(rv.HTTP.Paths))
- for n, p := range rv.HTTP.Paths {
- paths[n] = p.Path
- }
- return paths
-}
-
-func (i *ingressRuleAdaptorV1) host() string { return i.rule.Host }
-
-// Adaptor for networking.k8s.io/v1beta1.
-type ingressAdaptorV1Beta1 struct {
- ingress *v1beta1.Ingress
-}
-
-func newIngressAdaptorFromV1beta1(ingress *v1beta1.Ingress) ingressAdaptor {
- return &ingressAdaptorV1Beta1{ingress: ingress}
-}
-func (i *ingressAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta { return i.ingress.ObjectMeta }
-func (i *ingressAdaptorV1Beta1) name() string { return i.ingress.Name }
-func (i *ingressAdaptorV1Beta1) namespace() string { return i.ingress.Namespace }
-func (i *ingressAdaptorV1Beta1) labels() map[string]string { return i.ingress.Labels }
-func (i *ingressAdaptorV1Beta1) annotations() map[string]string { return i.ingress.Annotations }
-func (i *ingressAdaptorV1Beta1) ingressClassName() *string { return i.ingress.Spec.IngressClassName }
-
-func (i *ingressAdaptorV1Beta1) tlsHosts() []string {
- var hosts []string
- for _, tls := range i.ingress.Spec.TLS {
- hosts = append(hosts, tls.Hosts...)
- }
- return hosts
-}
-
-func (i *ingressAdaptorV1Beta1) rules() []ingressRuleAdaptor {
- var rules []ingressRuleAdaptor
- for _, rule := range i.ingress.Spec.Rules {
- rules = append(rules, newIngressRuleAdaptorFromV1Beta1(rule))
- }
- return rules
-}
-
-type ingressRuleAdaptorV1Beta1 struct {
- rule v1beta1.IngressRule
-}
-
-func newIngressRuleAdaptorFromV1Beta1(rule v1beta1.IngressRule) ingressRuleAdaptor {
- return &ingressRuleAdaptorV1Beta1{rule: rule}
-}
-
-func (i *ingressRuleAdaptorV1Beta1) paths() []string {
- rv := i.rule.IngressRuleValue
- if rv.HTTP == nil {
- return nil
- }
- paths := make([]string, len(rv.HTTP.Paths))
- for n, p := range rv.HTTP.Paths {
- paths[n] = p.Path
- }
- return paths
-}
-
-func (i *ingressRuleAdaptorV1Beta1) host() string { return i.rule.Host }
diff --git a/discovery/kubernetes/ingress_test.go b/discovery/kubernetes/ingress_test.go
index 8e6654c2cc..a828dee27f 100644
--- a/discovery/kubernetes/ingress_test.go
+++ b/discovery/kubernetes/ingress_test.go
@@ -20,7 +20,6 @@ import (
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
- "k8s.io/api/networking/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -89,60 +88,6 @@ func makeIngress(tls TLSMode) *v1.Ingress {
return ret
}
-func makeIngressV1beta1(tls TLSMode) *v1beta1.Ingress {
- ret := &v1beta1.Ingress{
- ObjectMeta: metav1.ObjectMeta{
- Name: "testingress",
- Namespace: "default",
- Labels: map[string]string{"test/label": "testvalue"},
- Annotations: map[string]string{"test/annotation": "testannotationvalue"},
- },
- Spec: v1beta1.IngressSpec{
- IngressClassName: classString("testclass"),
- TLS: nil,
- Rules: []v1beta1.IngressRule{
- {
- Host: "example.com",
- IngressRuleValue: v1beta1.IngressRuleValue{
- HTTP: &v1beta1.HTTPIngressRuleValue{
- Paths: []v1beta1.HTTPIngressPath{
- {Path: "/"},
- {Path: "/foo"},
- },
- },
- },
- },
- {
- // No backend config, ignored
- Host: "nobackend.example.com",
- IngressRuleValue: v1beta1.IngressRuleValue{
- HTTP: &v1beta1.HTTPIngressRuleValue{},
- },
- },
- {
- Host: "test.example.com",
- IngressRuleValue: v1beta1.IngressRuleValue{
- HTTP: &v1beta1.HTTPIngressRuleValue{
- Paths: []v1beta1.HTTPIngressPath{{}},
- },
- },
- },
- },
- },
- }
-
- switch tls {
- case TLSYes:
- ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com", "test.example.com"}}}
- case TLSMixed:
- ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com"}}}
- case TLSWildcard:
- ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"*.example.com"}}}
- }
-
- return ret
-}
-
func classString(v string) *string {
return &v
}
@@ -199,6 +144,7 @@ func expectedTargetGroups(ns string, tls TLSMode) map[string]*targetgroup.Group
}
func TestIngressDiscoveryAdd(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{
@@ -212,21 +158,8 @@ func TestIngressDiscoveryAdd(t *testing.T) {
}.Run(t)
}
-func TestIngressDiscoveryAddV1beta1(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0")
-
- k8sDiscoveryTest{
- discovery: n,
- afterStart: func() {
- obj := makeIngressV1beta1(TLSNo)
- c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{})
- },
- expectedMaxItems: 1,
- expectedRes: expectedTargetGroups("default", TLSNo),
- }.Run(t)
-}
-
func TestIngressDiscoveryAddTLS(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{
@@ -240,21 +173,8 @@ func TestIngressDiscoveryAddTLS(t *testing.T) {
}.Run(t)
}
-func TestIngressDiscoveryAddTLSV1beta1(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0")
-
- k8sDiscoveryTest{
- discovery: n,
- afterStart: func() {
- obj := makeIngressV1beta1(TLSYes)
- c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{})
- },
- expectedMaxItems: 1,
- expectedRes: expectedTargetGroups("default", TLSYes),
- }.Run(t)
-}
-
func TestIngressDiscoveryAddMixed(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{
@@ -268,21 +188,8 @@ func TestIngressDiscoveryAddMixed(t *testing.T) {
}.Run(t)
}
-func TestIngressDiscoveryAddMixedV1beta1(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0")
-
- k8sDiscoveryTest{
- discovery: n,
- afterStart: func() {
- obj := makeIngressV1beta1(TLSMixed)
- c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{})
- },
- expectedMaxItems: 1,
- expectedRes: expectedTargetGroups("default", TLSMixed),
- }.Run(t)
-}
-
func TestIngressDiscoveryNamespaces(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
expected := expectedTargetGroups("ns1", TLSNo)
@@ -303,28 +210,8 @@ func TestIngressDiscoveryNamespaces(t *testing.T) {
}.Run(t)
}
-func TestIngressDiscoveryNamespacesV1beta1(t *testing.T) {
- n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, "v1.18.0")
-
- expected := expectedTargetGroups("ns1", TLSNo)
- for k, v := range expectedTargetGroups("ns2", TLSNo) {
- expected[k] = v
- }
- k8sDiscoveryTest{
- discovery: n,
- afterStart: func() {
- for _, ns := range []string{"ns1", "ns2"} {
- obj := makeIngressV1beta1(TLSNo)
- obj.Namespace = ns
- c.NetworkingV1beta1().Ingresses(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
- }
- },
- expectedMaxItems: 2,
- expectedRes: expected,
- }.Run(t)
-}
-
func TestIngressDiscoveryOwnNamespace(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{IncludeOwnNamespace: true})
expected := expectedTargetGroups("own-ns", TLSNo)
diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go
index a8b6f85899..2261fb3efe 100644
--- a/discovery/kubernetes/kubernetes.go
+++ b/discovery/kubernetes/kubernetes.go
@@ -17,42 +17,36 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"os"
"reflect"
+ "slices"
"strings"
"sync"
"time"
- "github.com/prometheus/prometheus/util/strutil"
-
- disv1beta1 "k8s.io/api/discovery/v1beta1"
-
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
apiv1 "k8s.io/api/core/v1"
disv1 "k8s.io/api/discovery/v1"
networkv1 "k8s.io/api/networking/v1"
- "k8s.io/api/networking/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
- utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
+ _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" // Required to get the GCP auth provider working.
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/clientcmd"
- // Required to get the GCP auth provider working.
- _ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
-
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
+ "github.com/prometheus/prometheus/util/strutil"
)
const (
@@ -63,14 +57,10 @@ const (
presentValue = model.LabelValue("true")
)
-var (
- // Http header.
- userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
- // DefaultSDConfig is the default Kubernetes SD configuration.
- DefaultSDConfig = SDConfig{
- HTTPClientConfig: config.DefaultHTTPClientConfig,
- }
-)
+// DefaultSDConfig is the default Kubernetes SD configuration.
+var DefaultSDConfig = SDConfig{
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+}
func init() {
discovery.RegisterConfig(&SDConfig{})
@@ -177,7 +167,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err
}
if c.Role == "" {
- return fmt.Errorf("role missing (one of: pod, service, endpoints, endpointslice, node, ingress)")
+ return errors.New("role missing (one of: pod, service, endpoints, endpointslice, node, ingress)")
}
err = c.HTTPClientConfig.Validate()
if err != nil {
@@ -185,20 +175,20 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
if c.APIServer.URL != nil && c.KubeConfig != "" {
// Api-server and kubeconfig_file are mutually exclusive
- return fmt.Errorf("cannot use 'kubeconfig_file' and 'api_server' simultaneously")
+ return errors.New("cannot use 'kubeconfig_file' and 'api_server' simultaneously")
}
if c.KubeConfig != "" && !reflect.DeepEqual(c.HTTPClientConfig, config.DefaultHTTPClientConfig) {
// Kubeconfig_file and custom http config are mutually exclusive
- return fmt.Errorf("cannot use a custom HTTP client configuration together with 'kubeconfig_file'")
+ return errors.New("cannot use a custom HTTP client configuration together with 'kubeconfig_file'")
}
if c.APIServer.URL == nil && !reflect.DeepEqual(c.HTTPClientConfig, config.DefaultHTTPClientConfig) {
- return fmt.Errorf("to use custom HTTP client configuration please provide the 'api_server' URL explicitly")
+ return errors.New("to use custom HTTP client configuration please provide the 'api_server' URL explicitly")
}
if c.APIServer.URL != nil && c.NamespaceDiscovery.IncludeOwnNamespace {
- return fmt.Errorf("cannot use 'api_server' and 'namespaces.own_namespace' simultaneously")
+ return errors.New("cannot use 'api_server' and 'namespaces.own_namespace' simultaneously")
}
if c.KubeConfig != "" && c.NamespaceDiscovery.IncludeOwnNamespace {
- return fmt.Errorf("cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously")
+ return errors.New("cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously")
}
foundSelectorRoles := make(map[Role]struct{})
@@ -220,18 +210,9 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if _, ok := allowedSelectors[c.Role]; !ok {
return fmt.Errorf("invalid role: %q, expecting one of: pod, service, endpoints, endpointslice, node or ingress", c.Role)
}
- var allowed bool
- for _, role := range allowedSelectors[c.Role] {
- if role == string(selector.Role) {
- allowed = true
- break
- }
- }
-
- if !allowed {
+ if !slices.Contains(allowedSelectors[c.Role], string(selector.Role)) {
return fmt.Errorf("%s role supports only %s selectors", c.Role, strings.Join(allowedSelectors[c.Role], ", "))
}
-
_, err := fields.ParseSelector(selector.Field)
if err != nil {
return err
@@ -264,7 +245,7 @@ type Discovery struct {
sync.RWMutex
client kubernetes.Interface
role Role
- logger log.Logger
+ logger *slog.Logger
namespaceDiscovery *NamespaceDiscovery
discoverers []discovery.Discoverer
selectors roleSelector
@@ -289,14 +270,14 @@ func (d *Discovery) getNamespaces() []string {
}
// New creates a new Kubernetes discovery for the given role.
-func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) {
+func New(l *slog.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) {
m, ok := metrics.(*kubernetesMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if l == nil {
- l = log.NewNopLogger()
+ l = promslog.NewNopLogger()
}
var (
kcfg *rest.Config
@@ -328,7 +309,7 @@ func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Di
ownNamespace = string(ownNamespaceContents)
}
- level.Info(l).Log("msg", "Using pod service account via in-cluster config")
+ l.Info("Using pod service account via in-cluster config")
default:
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd")
if err != nil {
@@ -340,7 +321,7 @@ func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Di
}
}
- kcfg.UserAgent = userAgent
+ kcfg.UserAgent = version.PrometheusUserAgent()
kcfg.ContentType = "application/vnd.kubernetes.protobuf"
c, err := kubernetes.NewForConfig(kcfg)
@@ -401,55 +382,22 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
switch d.role {
case RoleEndpointSlice:
- // Check "networking.k8s.io/v1" availability with retries.
- // If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility
- var v1Supported bool
- if retryOnError(ctx, 10*time.Second,
- func() (err error) {
- v1Supported, err = checkDiscoveryV1Supported(d.client)
- if err != nil {
- level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err)
- }
- return err
- },
- ) {
- d.Unlock()
- return
- }
-
for _, namespace := range namespaces {
var informer cache.SharedIndexInformer
- if v1Supported {
- e := d.client.DiscoveryV1().EndpointSlices(namespace)
- elw := &cache.ListWatch{
- ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
- options.FieldSelector = d.selectors.endpointslice.field
- options.LabelSelector = d.selectors.endpointslice.label
- return e.List(ctx, options)
- },
- WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
- options.FieldSelector = d.selectors.endpointslice.field
- options.LabelSelector = d.selectors.endpointslice.label
- return e.Watch(ctx, options)
- },
- }
- informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{})
- } else {
- e := d.client.DiscoveryV1beta1().EndpointSlices(namespace)
- elw := &cache.ListWatch{
- ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
- options.FieldSelector = d.selectors.endpointslice.field
- options.LabelSelector = d.selectors.endpointslice.label
- return e.List(ctx, options)
- },
- WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
- options.FieldSelector = d.selectors.endpointslice.field
- options.LabelSelector = d.selectors.endpointslice.label
- return e.Watch(ctx, options)
- },
- }
- informer = d.newEndpointSlicesByNodeInformer(elw, &disv1beta1.EndpointSlice{})
+ e := d.client.DiscoveryV1().EndpointSlices(namespace)
+ elw := &cache.ListWatch{
+ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
+ options.FieldSelector = d.selectors.endpointslice.field
+ options.LabelSelector = d.selectors.endpointslice.label
+ return e.List(ctx, options)
+ },
+ WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
+ options.FieldSelector = d.selectors.endpointslice.field
+ options.LabelSelector = d.selectors.endpointslice.label
+ return e.Watch(ctx, options)
+ },
}
+ informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{})
s := d.client.CoreV1().Services(namespace)
slw := &cache.ListWatch{
@@ -483,7 +431,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
go nodeInf.Run(ctx.Done())
}
eps := NewEndpointSlice(
- log.With(d.logger, "role", "endpointslice"),
+ d.logger.With("role", "endpointslice"),
informer,
d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
@@ -543,7 +491,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
eps := NewEndpoints(
- log.With(d.logger, "role", "endpoint"),
+ d.logger.With("role", "endpoint"),
d.newEndpointsByNodeInformer(elw),
d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
@@ -577,7 +525,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
},
}
pod := NewPod(
- log.With(d.logger, "role", "pod"),
+ d.logger.With("role", "pod"),
d.newPodsByNodeInformer(plw),
nodeInformer,
d.metrics.eventCount,
@@ -601,7 +549,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
},
}
svc := NewService(
- log.With(d.logger, "role", "service"),
+ d.logger.With("role", "service"),
d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.metrics.eventCount,
)
@@ -609,57 +557,24 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
go svc.informer.Run(ctx.Done())
}
case RoleIngress:
- // Check "networking.k8s.io/v1" availability with retries.
- // If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility
- var v1Supported bool
- if retryOnError(ctx, 10*time.Second,
- func() (err error) {
- v1Supported, err = checkNetworkingV1Supported(d.client)
- if err != nil {
- level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err)
- }
- return err
- },
- ) {
- d.Unlock()
- return
- }
-
for _, namespace := range namespaces {
var informer cache.SharedInformer
- if v1Supported {
- i := d.client.NetworkingV1().Ingresses(namespace)
- ilw := &cache.ListWatch{
- ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
- options.FieldSelector = d.selectors.ingress.field
- options.LabelSelector = d.selectors.ingress.label
- return i.List(ctx, options)
- },
- WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
- options.FieldSelector = d.selectors.ingress.field
- options.LabelSelector = d.selectors.ingress.label
- return i.Watch(ctx, options)
- },
- }
- informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled)
- } else {
- i := d.client.NetworkingV1beta1().Ingresses(namespace)
- ilw := &cache.ListWatch{
- ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
- options.FieldSelector = d.selectors.ingress.field
- options.LabelSelector = d.selectors.ingress.label
- return i.List(ctx, options)
- },
- WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
- options.FieldSelector = d.selectors.ingress.field
- options.LabelSelector = d.selectors.ingress.label
- return i.Watch(ctx, options)
- },
- }
- informer = d.mustNewSharedInformer(ilw, &v1beta1.Ingress{}, resyncDisabled)
+ i := d.client.NetworkingV1().Ingresses(namespace)
+ ilw := &cache.ListWatch{
+ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
+ options.FieldSelector = d.selectors.ingress.field
+ options.LabelSelector = d.selectors.ingress.label
+ return i.List(ctx, options)
+ },
+ WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
+ options.FieldSelector = d.selectors.ingress.field
+ options.LabelSelector = d.selectors.ingress.label
+ return i.Watch(ctx, options)
+ },
}
+ informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled)
ingress := NewIngress(
- log.With(d.logger, "role", "ingress"),
+ d.logger.With("role", "ingress"),
informer,
d.metrics.eventCount,
)
@@ -668,11 +583,11 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
case RoleNode:
nodeInformer := d.newNodeInformer(ctx)
- node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.metrics.eventCount)
+ node := NewNode(d.logger.With("role", "node"), nodeInformer, d.metrics.eventCount)
d.discoverers = append(d.discoverers, node)
go node.informer.Run(ctx.Done())
default:
- level.Error(d.logger).Log("msg", "unknown Kubernetes discovery kind", "role", d.role)
+ d.logger.Error("unknown Kubernetes discovery kind", "role", d.role)
}
var wg sync.WaitGroup
@@ -720,20 +635,6 @@ func retryOnError(ctx context.Context, interval time.Duration, f func() error) (
}
}
-func checkNetworkingV1Supported(client kubernetes.Interface) (bool, error) {
- k8sVer, err := client.Discovery().ServerVersion()
- if err != nil {
- return false, err
- }
- semVer, err := utilversion.ParseSemantic(k8sVer.String())
- if err != nil {
- return false, err
- }
- // networking.k8s.io/v1 is available since Kubernetes v1.19
- // https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md
- return semVer.Major() >= 1 && semVer.Minor() >= 19, nil
-}
-
func (d *Discovery) newNodeInformer(ctx context.Context) cache.SharedInformer {
nlw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
@@ -756,7 +657,7 @@ func (d *Discovery) newPodsByNodeInformer(plw *cache.ListWatch) cache.SharedInde
indexers[nodeIndex] = func(obj interface{}) ([]string, error) {
pod, ok := obj.(*apiv1.Pod)
if !ok {
- return nil, fmt.Errorf("object is not a pod")
+ return nil, errors.New("object is not a pod")
}
return []string{pod.Spec.NodeName}, nil
}
@@ -770,7 +671,7 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share
indexers[podIndex] = func(obj interface{}) ([]string, error) {
e, ok := obj.(*apiv1.Endpoints)
if !ok {
- return nil, fmt.Errorf("object is not endpoints")
+ return nil, errors.New("object is not endpoints")
}
var pods []string
for _, target := range e.Subsets {
@@ -789,7 +690,7 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share
indexers[nodeIndex] = func(obj interface{}) ([]string, error) {
e, ok := obj.(*apiv1.Endpoints)
if !ok {
- return nil, fmt.Errorf("object is not endpoints")
+ return nil, errors.New("object is not endpoints")
}
var nodes []string
for _, target := range e.Subsets {
@@ -814,41 +715,41 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share
func (d *Discovery) newEndpointSlicesByNodeInformer(plw *cache.ListWatch, object runtime.Object) cache.SharedIndexInformer {
indexers := make(map[string]cache.IndexFunc)
+ indexers[serviceIndex] = func(obj interface{}) ([]string, error) {
+ e, ok := obj.(*disv1.EndpointSlice)
+ if !ok {
+ return nil, errors.New("object is not an endpointslice")
+ }
+
+ svcName, exists := e.Labels[disv1.LabelServiceName]
+ if !exists {
+ return nil, nil
+ }
+
+ return []string{namespacedName(e.Namespace, svcName)}, nil
+ }
if !d.attachMetadata.Node {
return d.mustNewSharedIndexInformer(plw, object, resyncDisabled, indexers)
}
indexers[nodeIndex] = func(obj interface{}) ([]string, error) {
+ e, ok := obj.(*disv1.EndpointSlice)
+ if !ok {
+ return nil, errors.New("object is not an endpointslice")
+ }
+
var nodes []string
- switch e := obj.(type) {
- case *disv1.EndpointSlice:
- for _, target := range e.Endpoints {
- if target.TargetRef != nil {
- switch target.TargetRef.Kind {
- case "Pod":
- if target.NodeName != nil {
- nodes = append(nodes, *target.NodeName)
- }
- case "Node":
- nodes = append(nodes, target.TargetRef.Name)
+ for _, target := range e.Endpoints {
+ if target.TargetRef != nil {
+ switch target.TargetRef.Kind {
+ case "Pod":
+ if target.NodeName != nil {
+ nodes = append(nodes, *target.NodeName)
}
+ case "Node":
+ nodes = append(nodes, target.TargetRef.Name)
}
}
- case *disv1beta1.EndpointSlice:
- for _, target := range e.Endpoints {
- if target.TargetRef != nil {
- switch target.TargetRef.Kind {
- case "Pod":
- if target.NodeName != nil {
- nodes = append(nodes, *target.NodeName)
- }
- case "Node":
- nodes = append(nodes, target.TargetRef.Name)
- }
- }
- }
- default:
- return nil, fmt.Errorf("object is not an endpointslice")
}
return nodes, nil
@@ -882,21 +783,6 @@ func (d *Discovery) mustNewSharedIndexInformer(lw cache.ListerWatcher, exampleOb
return informer
}
-func checkDiscoveryV1Supported(client kubernetes.Interface) (bool, error) {
- k8sVer, err := client.Discovery().ServerVersion()
- if err != nil {
- return false, err
- }
- semVer, err := utilversion.ParseSemantic(k8sVer.String())
- if err != nil {
- return false, err
- }
- // The discovery.k8s.io/v1beta1 API version of EndpointSlice will no longer be served in v1.25.
- // discovery.k8s.io/v1 is available since Kubernetes v1.21
- // https://kubernetes.io/docs/reference/using-api/deprecation-guide/#v1-25
- return semVer.Major() >= 1 && semVer.Minor() >= 21, nil
-}
-
func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, role Role) {
labelSet[model.LabelName(metaLabelPrefix+string(role)+"_name")] = lv(objectMeta.Name)
@@ -916,3 +802,13 @@ func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta,
func namespacedName(namespace, name string) string {
return namespace + "/" + name
}
+
+// nodeName knows how to handle the cache.DeletedFinalStateUnknown tombstone.
+// It assumes the MetaNamespaceKeyFunc keyFunc is used, which uses the node name as the tombstone key.
+func nodeName(o interface{}) (string, error) {
+ key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(o)
+ if err != nil {
+ return "", err
+ }
+ return key, nil
+}
diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go
index 552f8a4453..fb53032845 100644
--- a/discovery/kubernetes/kubernetes_test.go
+++ b/discovery/kubernetes/kubernetes_test.go
@@ -20,10 +20,13 @@ import (
"testing"
"time"
- "github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
+ apiv1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/version"
"k8s.io/apimachinery/pkg/watch"
@@ -33,8 +36,6 @@ import (
kubetesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache"
- "github.com/prometheus/client_golang/prometheus"
-
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil"
@@ -46,7 +47,7 @@ func TestMain(m *testing.M) {
// makeDiscovery creates a kubernetes.Discovery instance for testing.
func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface) {
- return makeDiscoveryWithVersion(role, nsDiscovery, "v1.22.0", objects...)
+ return makeDiscoveryWithVersion(role, nsDiscovery, "v1.25.0", objects...)
}
// makeDiscoveryWithVersion creates a kubernetes.Discovery instance with the specified kubernetes version for testing.
@@ -71,7 +72,7 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer
d := &Discovery{
client: clientset,
- logger: log.NewNopLogger(),
+ logger: promslog.NewNopLogger(),
role: role,
namespaceDiscovery: &nsDiscovery,
ownNamespace: "own-ns",
@@ -154,7 +155,7 @@ func (d k8sDiscoveryTest) Run(t *testing.T) {
// readResultWithTimeout reads all targetgroups from channel with timeout.
// It merges targetgroups by source and sends the result to result channel.
-func readResultWithTimeout(t *testing.T, ctx context.Context, ch <-chan []*targetgroup.Group, max int, stopAfter time.Duration, resChan chan<- map[string]*targetgroup.Group) {
+func readResultWithTimeout(t *testing.T, ctx context.Context, ch <-chan []*targetgroup.Group, maxGroups int, stopAfter time.Duration, resChan chan<- map[string]*targetgroup.Group) {
res := make(map[string]*targetgroup.Group)
timeout := time.After(stopAfter)
Loop:
@@ -167,7 +168,7 @@ Loop:
}
res[tg.Source] = tg
}
- if len(res) == max {
+ if len(res) == maxGroups {
// Reached max target groups we may get, break fast.
break Loop
}
@@ -175,10 +176,10 @@ Loop:
// Because we use queue, an object that is created then
// deleted or updated may be processed only once.
// So possibly we may skip events, timed out here.
- t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(res), max)
+ t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(res), maxGroups)
break Loop
case <-ctx.Done():
- t.Logf("stopped, got %d (max: %d) items", len(res), max)
+ t.Logf("stopped, got %d (max: %d) items", len(res), maxGroups)
break Loop
}
}
@@ -197,7 +198,7 @@ func requireTargetGroups(t *testing.T, expected, res map[string]*targetgroup.Gro
panic(err)
}
- require.Equal(t, string(b1), string(b2))
+ require.JSONEq(t, string(b1), string(b2))
}
// marshalTargetGroups serializes a set of target groups to JSON, ignoring the
@@ -271,6 +272,7 @@ func (s *Service) hasSynced() bool {
}
func TestRetryOnError(t *testing.T) {
+ t.Parallel()
for _, successAt := range []int{1, 2, 3} {
var called int
f := func() error {
@@ -285,41 +287,8 @@ func TestRetryOnError(t *testing.T) {
}
}
-func TestCheckNetworkingV1Supported(t *testing.T) {
- tests := []struct {
- version string
- wantSupported bool
- wantErr bool
- }{
- {version: "v1.18.0", wantSupported: false, wantErr: false},
- {version: "v1.18.1", wantSupported: false, wantErr: false},
- // networking v1 is supported since Kubernetes v1.19
- {version: "v1.19.0", wantSupported: true, wantErr: false},
- {version: "v1.20.0-beta.2", wantSupported: true, wantErr: false},
- // error patterns
- {version: "", wantSupported: false, wantErr: true},
- {version: "<>", wantSupported: false, wantErr: true},
- }
-
- for _, tc := range tests {
- tc := tc
- t.Run(tc.version, func(t *testing.T) {
- clientset := fake.NewSimpleClientset()
- fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery)
- fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: tc.version}
- supported, err := checkNetworkingV1Supported(clientset)
-
- if tc.wantErr {
- require.Error(t, err)
- } else {
- require.NoError(t, err)
- }
- require.Equal(t, tc.wantSupported, supported)
- })
- }
-}
-
func TestFailuresCountMetric(t *testing.T) {
+ t.Parallel()
tests := []struct {
role Role
minFailedWatches int
@@ -342,7 +311,7 @@ func TestFailuresCountMetric(t *testing.T) {
require.Equal(t, float64(0), prom_testutil.ToFloat64(n.metrics.failuresCount))
// Simulate an error on watch requests.
- c.Discovery().(*fakediscovery.FakeDiscovery).PrependWatchReactor("*", func(action kubetesting.Action) (bool, watch.Interface, error) {
+ c.Discovery().(*fakediscovery.FakeDiscovery).PrependWatchReactor("*", func(_ kubetesting.Action) (bool, watch.Interface, error) {
return true, nil, apierrors.NewUnauthorized("unauthorized")
})
@@ -354,3 +323,19 @@ func TestFailuresCountMetric(t *testing.T) {
})
}
}
+
+func TestNodeName(t *testing.T) {
+ t.Parallel()
+ node := &apiv1.Node{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "foo",
+ },
+ }
+ name, err := nodeName(node)
+ require.NoError(t, err)
+ require.Equal(t, "foo", name)
+
+ name, err = nodeName(cache.DeletedFinalStateUnknown{Key: "bar"})
+ require.NoError(t, err)
+ require.Equal(t, "bar", name)
+}
diff --git a/discovery/kubernetes/metrics.go b/discovery/kubernetes/metrics.go
index fe419bc782..ba3cb1d32a 100644
--- a/discovery/kubernetes/metrics.go
+++ b/discovery/kubernetes/metrics.go
@@ -28,7 +28,7 @@ type kubernetesMetrics struct {
metricRegisterer discovery.MetricRegisterer
}
-func newDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func newDiscovererMetrics(reg prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
m := &kubernetesMetrics{
eventCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go
index 74d87e22c4..0e0c5745f2 100644
--- a/discovery/kubernetes/node.go
+++ b/discovery/kubernetes/node.go
@@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@@ -38,16 +38,16 @@ const (
// Node discovers Kubernetes nodes.
type Node struct {
- logger log.Logger
+ logger *slog.Logger
informer cache.SharedInformer
store cache.Store
queue *workqueue.Type
}
// NewNode returns a new node discovery.
-func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node {
+func NewNode(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node {
if l == nil {
- l = log.NewNopLogger()
+ l = promslog.NewNopLogger()
}
nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd)
@@ -76,13 +76,13 @@ func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.Coun
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err)
+ l.Error("Error adding nodes event handler.", "err", err)
}
return n
}
func (n *Node) enqueue(obj interface{}) {
- key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
+ key, err := nodeName(obj)
if err != nil {
return
}
@@ -96,7 +96,7 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), n.informer.HasSynced) {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(n.logger).Log("msg", "node informer unable to sync cache")
+ n.logger.Error("node informer unable to sync cache")
}
return
}
@@ -133,7 +133,7 @@ func (n *Node) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool
}
node, err := convertToNode(o)
if err != nil {
- level.Error(n.logger).Log("msg", "converting to Node object failed", "err", err)
+ n.logger.Error("converting to Node object failed", "err", err)
return true
}
send(ctx, ch, n.buildNode(node))
@@ -181,7 +181,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group {
addr, addrMap, err := nodeAddress(node)
if err != nil {
- level.Warn(n.logger).Log("msg", "No node address found", "err", err)
+ n.logger.Warn("No node address found", "err", err)
return nil
}
addr = net.JoinHostPort(addr, strconv.FormatInt(int64(node.Status.DaemonEndpoints.KubeletEndpoint.Port), 10))
diff --git a/discovery/kubernetes/node_test.go b/discovery/kubernetes/node_test.go
index bbf7a6b27c..bc17efdc01 100644
--- a/discovery/kubernetes/node_test.go
+++ b/discovery/kubernetes/node_test.go
@@ -56,6 +56,7 @@ func makeEnumeratedNode(i int) *v1.Node {
}
func TestNodeDiscoveryBeforeStart(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
k8sDiscoveryTest{
@@ -95,6 +96,7 @@ func TestNodeDiscoveryBeforeStart(t *testing.T) {
}
func TestNodeDiscoveryAdd(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
k8sDiscoveryTest{
@@ -124,6 +126,7 @@ func TestNodeDiscoveryAdd(t *testing.T) {
}
func TestNodeDiscoveryDelete(t *testing.T) {
+ t.Parallel()
obj := makeEnumeratedNode(0)
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{}, obj)
@@ -142,6 +145,7 @@ func TestNodeDiscoveryDelete(t *testing.T) {
}
func TestNodeDiscoveryUpdate(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
k8sDiscoveryTest{
diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go
index 02990e415f..169c6a78a1 100644
--- a/discovery/kubernetes/pod.go
+++ b/discovery/kubernetes/pod.go
@@ -17,14 +17,14 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
"strings"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/cache"
@@ -44,14 +44,14 @@ type Pod struct {
nodeInf cache.SharedInformer
withNodeMetadata bool
store cache.Store
- logger log.Logger
+ logger *slog.Logger
queue *workqueue.Type
}
// NewPod creates a new pod discovery.
-func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod {
+func NewPod(l *slog.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod {
if l == nil {
- l = log.NewNopLogger()
+ l = promslog.NewNopLogger()
}
podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd)
@@ -81,7 +81,7 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding pods event handler.", "err", err)
+ l.Error("Error adding pods event handler.", "err", err)
}
if p.withNodeMetadata {
@@ -95,12 +95,15 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo
p.enqueuePodsForNode(node.Name)
},
DeleteFunc: func(o interface{}) {
- node := o.(*apiv1.Node)
- p.enqueuePodsForNode(node.Name)
+ nodeName, err := nodeName(o)
+ if err != nil {
+ l.Error("Error getting Node name", "err", err)
+ }
+ p.enqueuePodsForNode(nodeName)
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding pods event handler.", "err", err)
+ l.Error("Error adding pods event handler.", "err", err)
}
}
@@ -127,7 +130,7 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(p.logger).Log("msg", "pod informer unable to sync cache")
+ p.logger.Error("pod informer unable to sync cache")
}
return
}
@@ -164,7 +167,7 @@ func (p *Pod) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool
}
pod, err := convertToPod(o)
if err != nil {
- level.Error(p.logger).Log("msg", "converting to Pod object failed", "err", err)
+ p.logger.Error("converting to Pod object failed", "err", err)
return true
}
send(ctx, ch, p.buildPod(pod))
@@ -216,7 +219,7 @@ func podLabels(pod *apiv1.Pod) model.LabelSet {
podPhaseLabel: lv(string(pod.Status.Phase)),
podNodeNameLabel: lv(pod.Spec.NodeName),
podHostIPLabel: lv(pod.Status.HostIP),
- podUID: lv(string(pod.ObjectMeta.UID)),
+ podUID: lv(string(pod.UID)),
}
addObjectMetaLabels(ls, pod.ObjectMeta, RolePod)
@@ -246,7 +249,7 @@ func (p *Pod) findPodContainerStatus(statuses *[]apiv1.ContainerStatus, containe
func (p *Pod) findPodContainerID(statuses *[]apiv1.ContainerStatus, containerName string) string {
cStatus, err := p.findPodContainerStatus(statuses, containerName)
if err != nil {
- level.Debug(p.logger).Log("msg", "cannot find container ID", "err", err)
+ p.logger.Debug("cannot find container ID", "err", err)
return ""
}
return cStatus.ContainerID
@@ -315,7 +318,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
func (p *Pod) enqueuePodsForNode(nodeName string) {
pods, err := p.podInf.GetIndexer().ByIndex(nodeIndex, nodeName)
if err != nil {
- level.Error(p.logger).Log("msg", "Error getting pods for node", "node", nodeName, "err", err)
+ p.logger.Error("Error getting pods for node", "node", nodeName, "err", err)
return
}
diff --git a/discovery/kubernetes/pod_test.go b/discovery/kubernetes/pod_test.go
index 286a1a230d..7a3079a265 100644
--- a/discovery/kubernetes/pod_test.go
+++ b/discovery/kubernetes/pod_test.go
@@ -239,6 +239,7 @@ func expectedPodTargetGroupsWithNodeMeta(ns, nodeName string, nodeLabels map[str
}
func TestPodDiscoveryBeforeRun(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
k8sDiscoveryTest{
@@ -302,6 +303,7 @@ func TestPodDiscoveryBeforeRun(t *testing.T) {
}
func TestPodDiscoveryInitContainer(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
ns := "default"
@@ -329,6 +331,7 @@ func TestPodDiscoveryInitContainer(t *testing.T) {
}
func TestPodDiscoveryAdd(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
k8sDiscoveryTest{
@@ -343,6 +346,7 @@ func TestPodDiscoveryAdd(t *testing.T) {
}
func TestPodDiscoveryDelete(t *testing.T) {
+ t.Parallel()
obj := makePods()
n, c := makeDiscovery(RolePod, NamespaceDiscovery{}, obj)
@@ -362,6 +366,7 @@ func TestPodDiscoveryDelete(t *testing.T) {
}
func TestPodDiscoveryUpdate(t *testing.T) {
+ t.Parallel()
obj := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
@@ -403,6 +408,7 @@ func TestPodDiscoveryUpdate(t *testing.T) {
}
func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
initialPod := makePods()
@@ -427,6 +433,7 @@ func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
}
func TestPodDiscoveryNamespaces(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RolePod, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
expected := expectedPodTargetGroups("ns1")
@@ -448,6 +455,7 @@ func TestPodDiscoveryNamespaces(t *testing.T) {
}
func TestPodDiscoveryOwnNamespace(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RolePod, NamespaceDiscovery{IncludeOwnNamespace: true})
expected := expectedPodTargetGroups("own-ns")
@@ -466,6 +474,7 @@ func TestPodDiscoveryOwnNamespace(t *testing.T) {
}
func TestPodDiscoveryWithNodeMetadata(t *testing.T) {
+ t.Parallel()
attachMetadata := AttachMetadataConfig{Node: true}
n, c := makeDiscoveryWithMetadata(RolePod, NamespaceDiscovery{}, attachMetadata)
nodeLbls := map[string]string{"l1": "v1"}
@@ -485,6 +494,7 @@ func TestPodDiscoveryWithNodeMetadata(t *testing.T) {
}
func TestPodDiscoveryWithNodeMetadataUpdateNode(t *testing.T) {
+ t.Parallel()
nodeLbls := map[string]string{"l2": "v2"}
attachMetadata := AttachMetadataConfig{Node: true}
n, c := makeDiscoveryWithMetadata(RolePod, NamespaceDiscovery{}, attachMetadata)
diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go
index 51204a5a1a..e666497c86 100644
--- a/discovery/kubernetes/service.go
+++ b/discovery/kubernetes/service.go
@@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@@ -33,16 +33,16 @@ import (
// Service implements discovery of Kubernetes services.
type Service struct {
- logger log.Logger
+ logger *slog.Logger
informer cache.SharedInformer
store cache.Store
queue *workqueue.Type
}
// NewService returns a new service discovery.
-func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service {
+func NewService(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service {
if l == nil {
- l = log.NewNopLogger()
+ l = promslog.NewNopLogger()
}
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
@@ -71,7 +71,7 @@ func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C
},
})
if err != nil {
- level.Error(l).Log("msg", "Error adding services event handler.", "err", err)
+ l.Error("Error adding services event handler.", "err", err)
}
return s
}
@@ -91,7 +91,7 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), s.informer.HasSynced) {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(s.logger).Log("msg", "service informer unable to sync cache")
+ s.logger.Error("service informer unable to sync cache")
}
return
}
@@ -128,7 +128,7 @@ func (s *Service) process(ctx context.Context, ch chan<- []*targetgroup.Group) b
}
eps, err := convertToService(o)
if err != nil {
- level.Error(s.logger).Log("msg", "converting to Service object failed", "err", err)
+ s.logger.Error("converting to Service object failed", "err", err)
return true
}
send(ctx, ch, s.buildService(eps))
diff --git a/discovery/kubernetes/service_test.go b/discovery/kubernetes/service_test.go
index dde3aaea57..8386ef296a 100644
--- a/discovery/kubernetes/service_test.go
+++ b/discovery/kubernetes/service_test.go
@@ -118,6 +118,7 @@ func makeLoadBalancerService() *v1.Service {
}
func TestServiceDiscoveryAdd(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleService, NamespaceDiscovery{})
k8sDiscoveryTest{
@@ -189,6 +190,7 @@ func TestServiceDiscoveryAdd(t *testing.T) {
}
func TestServiceDiscoveryDelete(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
k8sDiscoveryTest{
@@ -207,6 +209,7 @@ func TestServiceDiscoveryDelete(t *testing.T) {
}
func TestServiceDiscoveryUpdate(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
k8sDiscoveryTest{
@@ -251,6 +254,7 @@ func TestServiceDiscoveryUpdate(t *testing.T) {
}
func TestServiceDiscoveryNamespaces(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleService, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
k8sDiscoveryTest{
@@ -303,6 +307,7 @@ func TestServiceDiscoveryNamespaces(t *testing.T) {
}
func TestServiceDiscoveryOwnNamespace(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleService, NamespaceDiscovery{IncludeOwnNamespace: true})
k8sDiscoveryTest{
@@ -338,6 +343,7 @@ func TestServiceDiscoveryOwnNamespace(t *testing.T) {
}
func TestServiceDiscoveryAllNamespaces(t *testing.T) {
+ t.Parallel()
n, c := makeDiscovery(RoleService, NamespaceDiscovery{})
k8sDiscoveryTest{
diff --git a/discovery/legacymanager/manager.go b/discovery/legacymanager/manager.go
deleted file mode 100644
index 6fc61485d1..0000000000
--- a/discovery/legacymanager/manager.go
+++ /dev/null
@@ -1,332 +0,0 @@
-// Copyright 2016 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package legacymanager
-
-import (
- "context"
- "fmt"
- "reflect"
- "sync"
- "time"
-
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
- "github.com/prometheus/client_golang/prometheus"
-
- "github.com/prometheus/prometheus/discovery"
- "github.com/prometheus/prometheus/discovery/targetgroup"
-)
-
-type poolKey struct {
- setName string
- provider string
-}
-
-// provider holds a Discoverer instance, its configuration and its subscribers.
-type provider struct {
- name string
- d discovery.Discoverer
- subs []string
- config interface{}
-}
-
-// NewManager is the Discovery Manager constructor.
-func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]discovery.DiscovererMetrics, options ...func(*Manager)) *Manager {
- if logger == nil {
- logger = log.NewNopLogger()
- }
- mgr := &Manager{
- logger: logger,
- syncCh: make(chan map[string][]*targetgroup.Group),
- targets: make(map[poolKey]map[string]*targetgroup.Group),
- discoverCancel: []context.CancelFunc{},
- ctx: ctx,
- updatert: 5 * time.Second,
- triggerSend: make(chan struct{}, 1),
- registerer: registerer,
- sdMetrics: sdMetrics,
- }
- for _, option := range options {
- option(mgr)
- }
-
- // Register the metrics.
- // We have to do this after setting all options, so that the name of the Manager is set.
- if metrics, err := discovery.NewManagerMetrics(registerer, mgr.name); err == nil {
- mgr.metrics = metrics
- } else {
- level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
- return nil
- }
-
- return mgr
-}
-
-// Name sets the name of the manager.
-func Name(n string) func(*Manager) {
- return func(m *Manager) {
- m.mtx.Lock()
- defer m.mtx.Unlock()
- m.name = n
- }
-}
-
-// Manager maintains a set of discovery providers and sends each update to a map channel.
-// Targets are grouped by the target set name.
-type Manager struct {
- logger log.Logger
- name string
- mtx sync.RWMutex
- ctx context.Context
- discoverCancel []context.CancelFunc
-
- // Some Discoverers(eg. k8s) send only the updates for a given target group
- // so we use map[tg.Source]*targetgroup.Group to know which group to update.
- targets map[poolKey]map[string]*targetgroup.Group
- // providers keeps track of SD providers.
- providers []*provider
- // The sync channel sends the updates as a map where the key is the job value from the scrape config.
- syncCh chan map[string][]*targetgroup.Group
-
- // How long to wait before sending updates to the channel. The variable
- // should only be modified in unit tests.
- updatert time.Duration
-
- // The triggerSend channel signals to the manager that new updates have been received from providers.
- triggerSend chan struct{}
-
- // A registerer for all service discovery metrics.
- registerer prometheus.Registerer
-
- metrics *discovery.Metrics
- sdMetrics map[string]discovery.DiscovererMetrics
-}
-
-// Run starts the background processing.
-func (m *Manager) Run() error {
- go m.sender()
- <-m.ctx.Done()
- m.cancelDiscoverers()
- return m.ctx.Err()
-}
-
-// SyncCh returns a read only channel used by all the clients to receive target updates.
-func (m *Manager) SyncCh() <-chan map[string][]*targetgroup.Group {
- return m.syncCh
-}
-
-// ApplyConfig removes all running discovery providers and starts new ones using the provided config.
-func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
- m.mtx.Lock()
- defer m.mtx.Unlock()
-
- for pk := range m.targets {
- if _, ok := cfg[pk.setName]; !ok {
- m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName)
- }
- }
- m.cancelDiscoverers()
- m.targets = make(map[poolKey]map[string]*targetgroup.Group)
- m.providers = nil
- m.discoverCancel = nil
-
- failedCount := 0
- for name, scfg := range cfg {
- failedCount += m.registerProviders(scfg, name)
- m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0)
- }
- m.metrics.FailedConfigs.Set(float64(failedCount))
-
- for _, prov := range m.providers {
- m.startProvider(m.ctx, prov)
- }
-
- return nil
-}
-
-// StartCustomProvider is used for sdtool. Only use this if you know what you're doing.
-func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker discovery.Discoverer) {
- p := &provider{
- name: name,
- d: worker,
- subs: []string{name},
- }
- m.providers = append(m.providers, p)
- m.startProvider(ctx, p)
-}
-
-func (m *Manager) startProvider(ctx context.Context, p *provider) {
- level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs))
- ctx, cancel := context.WithCancel(ctx)
- updates := make(chan []*targetgroup.Group)
-
- m.discoverCancel = append(m.discoverCancel, cancel)
-
- go p.d.Run(ctx, updates)
- go m.updater(ctx, p, updates)
-}
-
-func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targetgroup.Group) {
- for {
- select {
- case <-ctx.Done():
- return
- case tgs, ok := <-updates:
- m.metrics.ReceivedUpdates.Inc()
- if !ok {
- level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
- return
- }
-
- for _, s := range p.subs {
- m.updateGroup(poolKey{setName: s, provider: p.name}, tgs)
- }
-
- select {
- case m.triggerSend <- struct{}{}:
- default:
- }
- }
- }
-}
-
-func (m *Manager) sender() {
- ticker := time.NewTicker(m.updatert)
- defer ticker.Stop()
-
- for {
- select {
- case <-m.ctx.Done():
- return
- case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker.
- select {
- case <-m.triggerSend:
- m.metrics.SentUpdates.Inc()
- select {
- case m.syncCh <- m.allGroups():
- default:
- m.metrics.DelayedUpdates.Inc()
- level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
- select {
- case m.triggerSend <- struct{}{}:
- default:
- }
- }
- default:
- }
- }
- }
-}
-
-func (m *Manager) cancelDiscoverers() {
- for _, c := range m.discoverCancel {
- c()
- }
-}
-
-func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) {
- m.mtx.Lock()
- defer m.mtx.Unlock()
-
- if _, ok := m.targets[poolKey]; !ok {
- m.targets[poolKey] = make(map[string]*targetgroup.Group)
- }
- for _, tg := range tgs {
- if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics.
- m.targets[poolKey][tg.Source] = tg
- }
- }
-}
-
-func (m *Manager) allGroups() map[string][]*targetgroup.Group {
- m.mtx.RLock()
- defer m.mtx.RUnlock()
-
- tSets := map[string][]*targetgroup.Group{}
- n := map[string]int{}
- for pkey, tsets := range m.targets {
- for _, tg := range tsets {
- // Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager'
- // to signal that it needs to stop all scrape loops for this target set.
- tSets[pkey.setName] = append(tSets[pkey.setName], tg)
- n[pkey.setName] += len(tg.Targets)
- }
- }
- for setName, v := range n {
- m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
- }
- return tSets
-}
-
-// registerProviders returns a number of failed SD config.
-func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int {
- var (
- failed int
- added bool
- )
- add := func(cfg discovery.Config) {
- for _, p := range m.providers {
- if reflect.DeepEqual(cfg, p.config) {
- p.subs = append(p.subs, setName)
- added = true
- return
- }
- }
- typ := cfg.Name()
- d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{
- Logger: log.With(m.logger, "discovery", typ, "config", setName),
- Metrics: m.sdMetrics[typ],
- })
- if err != nil {
- level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)
- failed++
- return
- }
- m.providers = append(m.providers, &provider{
- name: fmt.Sprintf("%s/%d", typ, len(m.providers)),
- d: d,
- config: cfg,
- subs: []string{setName},
- })
- added = true
- }
- for _, cfg := range cfgs {
- add(cfg)
- }
- if !added {
- // Add an empty target group to force the refresh of the corresponding
- // scrape pool and to notify the receiver that this target set has no
- // current targets.
- // It can happen because the combined set of SD configurations is empty
- // or because we fail to instantiate all the SD configurations.
- add(discovery.StaticConfig{{}})
- }
- return failed
-}
-
-// StaticProvider holds a list of target groups that never change.
-type StaticProvider struct {
- TargetGroups []*targetgroup.Group
-}
-
-// Run implements the Worker interface.
-func (sd *StaticProvider) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
- // We still have to consider that the consumer exits right away in which case
- // the context will be canceled.
- select {
- case ch <- sd.TargetGroups:
- case <-ctx.Done():
- }
- close(ch)
-}
diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go
deleted file mode 100644
index a455a8e341..0000000000
--- a/discovery/legacymanager/manager_test.go
+++ /dev/null
@@ -1,1186 +0,0 @@
-// Copyright 2016 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package legacymanager
-
-import (
- "context"
- "fmt"
- "sort"
- "strconv"
- "testing"
- "time"
-
- "github.com/go-kit/log"
- "github.com/prometheus/client_golang/prometheus"
- client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
- "github.com/prometheus/common/model"
- "github.com/stretchr/testify/require"
-
- "github.com/prometheus/prometheus/discovery"
- "github.com/prometheus/prometheus/discovery/targetgroup"
- "github.com/prometheus/prometheus/util/testutil"
-)
-
-func TestMain(m *testing.M) {
- testutil.TolerantVerifyLeak(m)
-}
-
-func newTestMetrics(t *testing.T, reg prometheus.Registerer) (*discovery.RefreshMetricsManager, map[string]discovery.DiscovererMetrics) {
- refreshMetrics := discovery.NewRefreshMetrics(reg)
- sdMetrics, err := discovery.RegisterSDMetrics(reg, refreshMetrics)
- require.NoError(t, err)
- return &refreshMetrics, sdMetrics
-}
-
-// TestTargetUpdatesOrder checks that the target updates are received in the expected order.
-func TestTargetUpdatesOrder(t *testing.T) {
- // The order by which the updates are send is determined by the interval passed to the mock discovery adapter
- // Final targets array is ordered alphabetically by the name of the discoverer.
- // For example discoverer "A" with targets "t2,t3" and discoverer "B" with targets "t1,t2" will result in "t2,t3,t1,t2" after the merge.
- testCases := []struct {
- title string
- updates map[string][]update
- expectedTargets [][]*targetgroup.Group
- }{
- {
- title: "Single TP no updates",
- updates: map[string][]update{
- "tp1": {},
- },
- expectedTargets: nil,
- },
- {
- title: "Multiple TPs no updates",
- updates: map[string][]update{
- "tp1": {},
- "tp2": {},
- "tp3": {},
- },
- expectedTargets: nil,
- },
- {
- title: "Single TP empty initials",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{},
- interval: 5 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {},
- },
- },
- {
- title: "Multiple TPs empty initials",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{},
- interval: 5 * time.Millisecond,
- },
- },
- "tp2": {
- {
- targetGroups: []targetgroup.Group{},
- interval: 200 * time.Millisecond,
- },
- },
- "tp3": {
- {
- targetGroups: []targetgroup.Group{},
- interval: 100 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {},
- {},
- {},
- },
- },
- {
- title: "Single TP initials only",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- },
- },
- {
- title: "Multiple TPs initials only",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- },
- },
- "tp2": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- },
- interval: 10 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- }, {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- },
- },
- },
- {
- title: "Single TP initials followed by empty updates",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- interval: 0,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{},
- },
- },
- interval: 10 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{},
- },
- },
- },
- },
- {
- title: "Single TP initials and new groups",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- interval: 0,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- {
- Source: "tp1_group3",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- },
- interval: 10 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- {
- Source: "tp1_group3",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- },
- },
- },
- {
- title: "Multiple TPs initials and new groups",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- interval: 10 * time.Millisecond,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group3",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group4",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- },
- interval: 500 * time.Millisecond,
- },
- },
- "tp2": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "5"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "6"}},
- },
- },
- interval: 100 * time.Millisecond,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp2_group3",
- Targets: []model.LabelSet{{"__instance__": "7"}},
- },
- {
- Source: "tp2_group4",
- Targets: []model.LabelSet{{"__instance__": "8"}},
- },
- },
- interval: 10 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "5"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "6"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "5"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "6"}},
- },
- {
- Source: "tp2_group3",
- Targets: []model.LabelSet{{"__instance__": "7"}},
- },
- {
- Source: "tp2_group4",
- Targets: []model.LabelSet{{"__instance__": "8"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- {
- Source: "tp1_group3",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group4",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "5"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "6"}},
- },
- {
- Source: "tp2_group3",
- Targets: []model.LabelSet{{"__instance__": "7"}},
- },
- {
- Source: "tp2_group4",
- Targets: []model.LabelSet{{"__instance__": "8"}},
- },
- },
- },
- },
- {
- title: "One TP initials arrive after other TP updates.",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- interval: 10 * time.Millisecond,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- },
- interval: 150 * time.Millisecond,
- },
- },
- "tp2": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "5"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "6"}},
- },
- },
- interval: 200 * time.Millisecond,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "7"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "8"}},
- },
- },
- interval: 100 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "5"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "6"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- {
- Source: "tp2_group1",
- Targets: []model.LabelSet{{"__instance__": "7"}},
- },
- {
- Source: "tp2_group2",
- Targets: []model.LabelSet{{"__instance__": "8"}},
- },
- },
- },
- },
-
- {
- title: "Single TP empty update in between",
- updates: map[string][]update{
- "tp1": {
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- interval: 30 * time.Millisecond,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{},
- },
- },
- interval: 10 * time.Millisecond,
- },
- {
- targetGroups: []targetgroup.Group{
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- },
- interval: 300 * time.Millisecond,
- },
- },
- },
- expectedTargets: [][]*targetgroup.Group{
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{},
- },
- },
- {
- {
- Source: "tp1_group1",
- Targets: []model.LabelSet{{"__instance__": "3"}},
- },
- {
- Source: "tp1_group2",
- Targets: []model.LabelSet{{"__instance__": "4"}},
- },
- },
- },
- },
- }
-
- for i, tc := range testCases {
- tc := tc
- t.Run(tc.title, func(t *testing.T) {
- ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
- require.NotNil(t, discoveryManager)
- discoveryManager.updatert = 100 * time.Millisecond
-
- var totalUpdatesCount int
- for _, up := range tc.updates {
- if len(up) > 0 {
- totalUpdatesCount += len(up)
- }
- }
- provUpdates := make(chan []*targetgroup.Group, totalUpdatesCount)
-
- for _, up := range tc.updates {
- go newMockDiscoveryProvider(up...).Run(ctx, provUpdates)
- }
-
- for x := 0; x < totalUpdatesCount; x++ {
- select {
- case <-ctx.Done():
- t.Fatalf("%d: no update arrived within the timeout limit", x)
- case tgs := <-provUpdates:
- discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs)
- for _, got := range discoveryManager.allGroups() {
- assertEqualGroups(t, got, tc.expectedTargets[x])
- }
- }
- }
- })
- }
-}
-
-func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group) {
- t.Helper()
-
- // Need to sort by the groups's source as the received order is not guaranteed.
- sort.Sort(byGroupSource(got))
- sort.Sort(byGroupSource(expected))
-
- require.Equal(t, expected, got)
-}
-
-func staticConfig(addrs ...string) discovery.StaticConfig {
- var cfg discovery.StaticConfig
- for i, addr := range addrs {
- cfg = append(cfg, &targetgroup.Group{
- Source: strconv.Itoa(i),
- Targets: []model.LabelSet{
- {model.AddressLabel: model.LabelValue(addr)},
- },
- })
- }
- return cfg
-}
-
-func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) {
- t.Helper()
- if _, ok := tSets[poolKey]; !ok {
- t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets)
- }
-
- match := false
- var mergedTargets string
- for _, targetGroup := range tSets[poolKey] {
- for _, l := range targetGroup.Targets {
- mergedTargets = mergedTargets + " " + l.String()
- if l.String() == label {
- match = true
- }
- }
- }
- if match != present {
- msg := ""
- if !present {
- msg = "not"
- }
- t.Fatalf("%q should %s be present in Targets labels: %q", label, msg, mergedTargets)
- }
-}
-
-func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) {
- ctx, cancel := context.WithCancel(context.Background())
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
- require.NotNil(t, discoveryManager)
- discoveryManager.updatert = 100 * time.Millisecond
- go discoveryManager.Run()
-
- c := map[string]discovery.Configs{
- "prometheus": {
- staticConfig("foo:9090", "bar:9090"),
- },
- }
- discoveryManager.ApplyConfig(c)
-
- <-discoveryManager.SyncCh()
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true)
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", true)
-
- c["prometheus"] = discovery.Configs{
- staticConfig("foo:9090"),
- }
- discoveryManager.ApplyConfig(c)
-
- <-discoveryManager.SyncCh()
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true)
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", false)
-}
-
-func TestDiscovererConfigs(t *testing.T) {
- ctx, cancel := context.WithCancel(context.Background())
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
- require.NotNil(t, discoveryManager)
- discoveryManager.updatert = 100 * time.Millisecond
- go discoveryManager.Run()
-
- c := map[string]discovery.Configs{
- "prometheus": {
- staticConfig("foo:9090", "bar:9090"),
- staticConfig("baz:9090"),
- },
- }
- discoveryManager.ApplyConfig(c)
-
- <-discoveryManager.SyncCh()
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true)
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", true)
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/1"}, "{__address__=\"baz:9090\"}", true)
-}
-
-// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
-// removing all targets from the static_configs sends an update with empty targetGroups.
-// This is required to signal the receiver that this target set has no current targets.
-func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
- ctx, cancel := context.WithCancel(context.Background())
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
- require.NotNil(t, discoveryManager)
- discoveryManager.updatert = 100 * time.Millisecond
- go discoveryManager.Run()
-
- c := map[string]discovery.Configs{
- "prometheus": {
- staticConfig("foo:9090"),
- },
- }
- discoveryManager.ApplyConfig(c)
-
- <-discoveryManager.SyncCh()
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true)
-
- c["prometheus"] = discovery.Configs{
- discovery.StaticConfig{{}},
- }
- discoveryManager.ApplyConfig(c)
-
- <-discoveryManager.SyncCh()
-
- pkey := poolKey{setName: "prometheus", provider: "static/0"}
- targetGroups, ok := discoveryManager.targets[pkey]
- if !ok {
- t.Fatalf("'%v' should be present in target groups", pkey)
- }
- group, ok := targetGroups[""]
- if !ok {
- t.Fatalf("missing '' key in target groups %v", targetGroups)
- }
-
- if len(group.Targets) != 0 {
- t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets))
- }
-}
-
-func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
- ctx, cancel := context.WithCancel(context.Background())
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- discoveryManager := NewManager(ctx, nil, reg, sdMetrics)
- require.NotNil(t, discoveryManager)
- discoveryManager.updatert = 100 * time.Millisecond
- go discoveryManager.Run()
-
- c := map[string]discovery.Configs{
- "prometheus": {
- staticConfig("foo:9090"),
- },
- "prometheus2": {
- staticConfig("foo:9090"),
- },
- }
- discoveryManager.ApplyConfig(c)
-
- <-discoveryManager.SyncCh()
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true)
- verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus2", provider: "static/0"}, "{__address__=\"foo:9090\"}", true)
- if len(discoveryManager.providers) != 1 {
- t.Fatalf("Invalid number of providers: expected 1, got %d", len(discoveryManager.providers))
- }
-}
-
-func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
- originalConfig := discovery.Configs{
- staticConfig("foo:9090", "bar:9090", "baz:9090"),
- }
- processedConfig := discovery.Configs{
- staticConfig("foo:9090", "bar:9090", "baz:9090"),
- }
- ctx, cancel := context.WithCancel(context.Background())
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
- require.NotNil(t, discoveryManager)
- discoveryManager.updatert = 100 * time.Millisecond
- go discoveryManager.Run()
-
- cfgs := map[string]discovery.Configs{
- "prometheus": processedConfig,
- }
- discoveryManager.ApplyConfig(cfgs)
- <-discoveryManager.SyncCh()
-
- for _, cfg := range cfgs {
- require.Equal(t, originalConfig, cfg)
- }
-}
-
-type errorConfig struct{ err error }
-
-func (e errorConfig) Name() string { return "error" }
-func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return nil, e.err
-}
-
-// NewDiscovererMetrics implements discovery.Config.
-func (errorConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
- return &discovery.NoopDiscovererMetrics{}
-}
-
-func TestGaugeFailedConfigs(t *testing.T) {
- ctx, cancel := context.WithCancel(context.Background())
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
- require.NotNil(t, discoveryManager)
- discoveryManager.updatert = 100 * time.Millisecond
- go discoveryManager.Run()
-
- c := map[string]discovery.Configs{
- "prometheus": {
- errorConfig{fmt.Errorf("tests error 0")},
- errorConfig{fmt.Errorf("tests error 1")},
- errorConfig{fmt.Errorf("tests error 2")},
- },
- }
- discoveryManager.ApplyConfig(c)
- <-discoveryManager.SyncCh()
-
- failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
- if failedCount != 3 {
- t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
- }
-
- c["prometheus"] = discovery.Configs{
- staticConfig("foo:9090"),
- }
- discoveryManager.ApplyConfig(c)
- <-discoveryManager.SyncCh()
-
- failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
- if failedCount != 0 {
- t.Fatalf("Expected to get no failed config, got: %v", failedCount)
- }
-}
-
-func TestCoordinationWithReceiver(t *testing.T) {
- updateDelay := 100 * time.Millisecond
-
- type expect struct {
- delay time.Duration
- tgs map[string][]*targetgroup.Group
- }
-
- testCases := []struct {
- title string
- providers map[string]discovery.Discoverer
- expected []expect
- }{
- {
- title: "Receiver should get all updates even when one provider closes its channel",
- providers: map[string]discovery.Discoverer{
- "once1": &onceProvider{
- tgs: []*targetgroup.Group{
- {
- Source: "tg1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- },
- },
- "mock1": newMockDiscoveryProvider(
- update{
- interval: 2 * updateDelay,
- targetGroups: []targetgroup.Group{
- {
- Source: "tg2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- },
- ),
- },
- expected: []expect{
- {
- tgs: map[string][]*targetgroup.Group{
- "once1": {
- {
- Source: "tg1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- },
- },
- },
- {
- tgs: map[string][]*targetgroup.Group{
- "once1": {
- {
- Source: "tg1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- },
- "mock1": {
- {
- Source: "tg2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- },
- },
- },
- },
- {
- title: "Receiver should get all updates even when the channel is blocked",
- providers: map[string]discovery.Discoverer{
- "mock1": newMockDiscoveryProvider(
- update{
- targetGroups: []targetgroup.Group{
- {
- Source: "tg1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- },
- },
- update{
- interval: 4 * updateDelay,
- targetGroups: []targetgroup.Group{
- {
- Source: "tg2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- },
- ),
- },
- expected: []expect{
- {
- delay: 2 * updateDelay,
- tgs: map[string][]*targetgroup.Group{
- "mock1": {
- {
- Source: "tg1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- },
- },
- },
- {
- delay: 4 * updateDelay,
- tgs: map[string][]*targetgroup.Group{
- "mock1": {
- {
- Source: "tg1",
- Targets: []model.LabelSet{{"__instance__": "1"}},
- },
- {
- Source: "tg2",
- Targets: []model.LabelSet{{"__instance__": "2"}},
- },
- },
- },
- },
- },
- },
- }
-
- for _, tc := range testCases {
- tc := tc
- t.Run(tc.title, func(t *testing.T) {
- ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
- defer cancel()
-
- reg := prometheus.NewRegistry()
- _, sdMetrics := newTestMetrics(t, reg)
-
- mgr := NewManager(ctx, nil, reg, sdMetrics)
- require.NotNil(t, mgr)
- mgr.updatert = updateDelay
- go mgr.Run()
-
- for name, p := range tc.providers {
- mgr.StartCustomProvider(ctx, name, p)
- }
-
- for i, expected := range tc.expected {
- time.Sleep(expected.delay)
- select {
- case <-ctx.Done():
- t.Fatalf("step %d: no update received in the expected timeframe", i)
- case tgs, ok := <-mgr.SyncCh():
- if !ok {
- t.Fatalf("step %d: discovery manager channel is closed", i)
- }
- if len(tgs) != len(expected.tgs) {
- t.Fatalf("step %d: target groups mismatch, got: %d, expected: %d\ngot: %#v\nexpected: %#v",
- i, len(tgs), len(expected.tgs), tgs, expected.tgs)
- }
- for k := range expected.tgs {
- if _, ok := tgs[k]; !ok {
- t.Fatalf("step %d: target group not found: %s\ngot: %#v", i, k, tgs)
- }
- assertEqualGroups(t, tgs[k], expected.tgs[k])
- }
- }
- }
- })
- }
-}
-
-type update struct {
- targetGroups []targetgroup.Group
- interval time.Duration
-}
-
-type mockdiscoveryProvider struct {
- updates []update
-}
-
-func newMockDiscoveryProvider(updates ...update) mockdiscoveryProvider {
- tp := mockdiscoveryProvider{
- updates: updates,
- }
- return tp
-}
-
-func (tp mockdiscoveryProvider) Run(ctx context.Context, upCh chan<- []*targetgroup.Group) {
- for _, u := range tp.updates {
- if u.interval > 0 {
- select {
- case <-ctx.Done():
- return
- case <-time.After(u.interval):
- }
- }
- tgs := make([]*targetgroup.Group, len(u.targetGroups))
- for i := range u.targetGroups {
- tgs[i] = &u.targetGroups[i]
- }
- upCh <- tgs
- }
- <-ctx.Done()
-}
-
-// byGroupSource implements sort.Interface so we can sort by the Source field.
-type byGroupSource []*targetgroup.Group
-
-func (a byGroupSource) Len() int { return len(a) }
-func (a byGroupSource) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a byGroupSource) Less(i, j int) bool { return a[i].Source < a[j].Source }
-
-// onceProvider sends updates once (if any) and closes the update channel.
-type onceProvider struct {
- tgs []*targetgroup.Group
-}
-
-func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) {
- if len(o.tgs) > 0 {
- ch <- o.tgs
- }
- close(ch)
-}
diff --git a/discovery/legacymanager/registry.go b/discovery/legacymanager/registry.go
deleted file mode 100644
index 955705394d..0000000000
--- a/discovery/legacymanager/registry.go
+++ /dev/null
@@ -1,261 +0,0 @@
-// Copyright 2020 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package legacymanager
-
-import (
- "errors"
- "fmt"
- "reflect"
- "sort"
- "strconv"
- "strings"
- "sync"
-
- "gopkg.in/yaml.v2"
-
- "github.com/prometheus/prometheus/discovery"
- "github.com/prometheus/prometheus/discovery/targetgroup"
-)
-
-const (
- configFieldPrefix = "AUTO_DISCOVERY_"
- staticConfigsKey = "static_configs"
- staticConfigsFieldName = configFieldPrefix + staticConfigsKey
-)
-
-var (
- configNames = make(map[string]discovery.Config)
- configFieldNames = make(map[reflect.Type]string)
- configFields []reflect.StructField
-
- configTypesMu sync.Mutex
- configTypes = make(map[reflect.Type]reflect.Type)
-
- emptyStructType = reflect.TypeOf(struct{}{})
- configsType = reflect.TypeOf(discovery.Configs{})
-)
-
-// RegisterConfig registers the given Config type for YAML marshaling and unmarshaling.
-func RegisterConfig(config discovery.Config) {
- registerConfig(config.Name()+"_sd_configs", reflect.TypeOf(config), config)
-}
-
-func init() {
- // N.B.: static_configs is the only Config type implemented by default.
- // All other types are registered at init by their implementing packages.
- elemTyp := reflect.TypeOf(&targetgroup.Group{})
- registerConfig(staticConfigsKey, elemTyp, discovery.StaticConfig{})
-}
-
-func registerConfig(yamlKey string, elemType reflect.Type, config discovery.Config) {
- name := config.Name()
- if _, ok := configNames[name]; ok {
- panic(fmt.Sprintf("discovery: Config named %q is already registered", name))
- }
- configNames[name] = config
-
- fieldName := configFieldPrefix + yamlKey // Field must be exported.
- configFieldNames[elemType] = fieldName
-
- // Insert fields in sorted order.
- i := sort.Search(len(configFields), func(k int) bool {
- return fieldName < configFields[k].Name
- })
- configFields = append(configFields, reflect.StructField{}) // Add empty field at end.
- copy(configFields[i+1:], configFields[i:]) // Shift fields to the right.
- configFields[i] = reflect.StructField{ // Write new field in place.
- Name: fieldName,
- Type: reflect.SliceOf(elemType),
- Tag: reflect.StructTag(`yaml:"` + yamlKey + `,omitempty"`),
- }
-}
-
-func getConfigType(out reflect.Type) reflect.Type {
- configTypesMu.Lock()
- defer configTypesMu.Unlock()
- if typ, ok := configTypes[out]; ok {
- return typ
- }
- // Initial exported fields map one-to-one.
- var fields []reflect.StructField
- for i, n := 0, out.NumField(); i < n; i++ {
- switch field := out.Field(i); {
- case field.PkgPath == "" && field.Type != configsType:
- fields = append(fields, field)
- default:
- fields = append(fields, reflect.StructField{
- Name: "_" + field.Name, // Field must be unexported.
- PkgPath: out.PkgPath(),
- Type: emptyStructType,
- })
- }
- }
- // Append extra config fields on the end.
- fields = append(fields, configFields...)
- typ := reflect.StructOf(fields)
- configTypes[out] = typ
- return typ
-}
-
-// UnmarshalYAMLWithInlineConfigs helps implement yaml.Unmarshal for structs
-// that have a Configs field that should be inlined.
-func UnmarshalYAMLWithInlineConfigs(out interface{}, unmarshal func(interface{}) error) error {
- outVal := reflect.ValueOf(out)
- if outVal.Kind() != reflect.Ptr {
- return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out)
- }
- outVal = outVal.Elem()
- if outVal.Kind() != reflect.Struct {
- return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out)
- }
- outTyp := outVal.Type()
-
- cfgTyp := getConfigType(outTyp)
- cfgPtr := reflect.New(cfgTyp)
- cfgVal := cfgPtr.Elem()
-
- // Copy shared fields (defaults) to dynamic value.
- var configs *discovery.Configs
- for i, n := 0, outVal.NumField(); i < n; i++ {
- if outTyp.Field(i).Type == configsType {
- configs = outVal.Field(i).Addr().Interface().(*discovery.Configs)
- continue
- }
- if cfgTyp.Field(i).PkgPath != "" {
- continue // Field is unexported: ignore.
- }
- cfgVal.Field(i).Set(outVal.Field(i))
- }
- if configs == nil {
- return fmt.Errorf("discovery: Configs field not found in type: %T", out)
- }
-
- // Unmarshal into dynamic value.
- if err := unmarshal(cfgPtr.Interface()); err != nil {
- return replaceYAMLTypeError(err, cfgTyp, outTyp)
- }
-
- // Copy shared fields from dynamic value.
- for i, n := 0, outVal.NumField(); i < n; i++ {
- if cfgTyp.Field(i).PkgPath != "" {
- continue // Field is unexported: ignore.
- }
- outVal.Field(i).Set(cfgVal.Field(i))
- }
-
- var err error
- *configs, err = readConfigs(cfgVal, outVal.NumField())
- return err
-}
-
-func readConfigs(structVal reflect.Value, startField int) (discovery.Configs, error) {
- var (
- configs discovery.Configs
- targets []*targetgroup.Group
- )
- for i, n := startField, structVal.NumField(); i < n; i++ {
- field := structVal.Field(i)
- if field.Kind() != reflect.Slice {
- panic("discovery: internal error: field is not a slice")
- }
- for k := 0; k < field.Len(); k++ {
- val := field.Index(k)
- if val.IsZero() || (val.Kind() == reflect.Ptr && val.Elem().IsZero()) {
- key := configFieldNames[field.Type().Elem()]
- key = strings.TrimPrefix(key, configFieldPrefix)
- return nil, fmt.Errorf("empty or null section in %s", key)
- }
- switch c := val.Interface().(type) {
- case *targetgroup.Group:
- // Add index to the static config target groups for unique identification
- // within scrape pool.
- c.Source = strconv.Itoa(len(targets))
- // Coalesce multiple static configs into a single static config.
- targets = append(targets, c)
- case discovery.Config:
- configs = append(configs, c)
- default:
- panic("discovery: internal error: slice element is not a Config")
- }
- }
- }
- if len(targets) > 0 {
- configs = append(configs, discovery.StaticConfig(targets))
- }
- return configs, nil
-}
-
-// MarshalYAMLWithInlineConfigs helps implement yaml.Marshal for structs
-// that have a Configs field that should be inlined.
-func MarshalYAMLWithInlineConfigs(in interface{}) (interface{}, error) {
- inVal := reflect.ValueOf(in)
- for inVal.Kind() == reflect.Ptr {
- inVal = inVal.Elem()
- }
- inTyp := inVal.Type()
-
- cfgTyp := getConfigType(inTyp)
- cfgPtr := reflect.New(cfgTyp)
- cfgVal := cfgPtr.Elem()
-
- // Copy shared fields to dynamic value.
- var configs *discovery.Configs
- for i, n := 0, inTyp.NumField(); i < n; i++ {
- if inTyp.Field(i).Type == configsType {
- configs = inVal.Field(i).Addr().Interface().(*discovery.Configs)
- }
- if cfgTyp.Field(i).PkgPath != "" {
- continue // Field is unexported: ignore.
- }
- cfgVal.Field(i).Set(inVal.Field(i))
- }
- if configs == nil {
- return nil, fmt.Errorf("discovery: Configs field not found in type: %T", in)
- }
-
- if err := writeConfigs(cfgVal, *configs); err != nil {
- return nil, err
- }
-
- return cfgPtr.Interface(), nil
-}
-
-func writeConfigs(structVal reflect.Value, configs discovery.Configs) error {
- targets := structVal.FieldByName(staticConfigsFieldName).Addr().Interface().(*[]*targetgroup.Group)
- for _, c := range configs {
- if sc, ok := c.(discovery.StaticConfig); ok {
- *targets = append(*targets, sc...)
- continue
- }
- fieldName, ok := configFieldNames[reflect.TypeOf(c)]
- if !ok {
- return fmt.Errorf("discovery: cannot marshal unregistered Config type: %T", c)
- }
- field := structVal.FieldByName(fieldName)
- field.Set(reflect.Append(field, reflect.ValueOf(c)))
- }
- return nil
-}
-
-func replaceYAMLTypeError(err error, oldTyp, newTyp reflect.Type) error {
- var e *yaml.TypeError
- if errors.As(err, &e) {
- oldStr := oldTyp.String()
- newStr := newTyp.String()
- for i, s := range e.Errors {
- e.Errors[i] = strings.ReplaceAll(s, oldStr, newStr)
- }
- }
- return err
-}
diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go
index 634a6b1d4b..033025f840 100644
--- a/discovery/linode/linode.go
+++ b/discovery/linode/linode.go
@@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"net/http"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/linode/linodego"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -138,10 +138,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*linodeMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
d := &Discovery{
@@ -165,7 +165,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
Timeout: time.Duration(conf.RefreshInterval),
},
)
- client.SetUserAgent(fmt.Sprintf("Prometheus/%s", version.Version))
+ client.SetUserAgent(version.PrometheusUserAgent())
d.client = &client
d.Discovery = refresh.NewDiscovery(
@@ -194,13 +194,12 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
events, err := d.client.ListEvents(ctx, &eventsOpts)
if err != nil {
var e *linodego.Error
- if errors.As(err, &e) && e.Code == http.StatusUnauthorized {
- // If we get a 401, the token doesn't have `events:read_only` scope.
- // Disable event polling and fallback to doing a full refresh every interval.
- d.eventPollingEnabled = false
- } else {
+ if !errors.As(err, &e) || e.Code != http.StatusUnauthorized {
return nil, err
}
+ // If we get a 401, the token doesn't have `events:read_only` scope.
+ // Disable event polling and fallback to doing a full refresh every interval.
+ d.eventPollingEnabled = false
} else {
// Event polling tells us changes the Linode API is aware of. Actions issued outside of the Linode API,
// such as issuing a `shutdown` at the VM's console instead of using the API to power off an instance,
diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go
index 3c10650653..7bcaa05ba4 100644
--- a/discovery/linode/linode_test.go
+++ b/discovery/linode/linode_test.go
@@ -19,10 +19,10 @@ import (
"net/url"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@@ -238,7 +238,7 @@ func TestLinodeSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Endpoint())
require.NoError(t, err)
diff --git a/discovery/manager.go b/discovery/manager.go
index 897d7d151c..24950d9d59 100644
--- a/discovery/manager.go
+++ b/discovery/manager.go
@@ -16,14 +16,14 @@ package discovery
import (
"context"
"fmt"
+ "log/slog"
"reflect"
"sync"
"time"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
@@ -57,6 +57,8 @@ func (p *Provider) Discoverer() Discoverer {
// IsStarted return true if Discoverer is started.
func (p *Provider) IsStarted() bool {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
return p.cancel != nil
}
@@ -64,7 +66,7 @@ func (p *Provider) Config() interface{} {
return p.config
}
-// Registers the metrics needed for SD mechanisms.
+// CreateAndRegisterSDMetrics registers the metrics needed for SD mechanisms.
// Does not register the metrics for the Discovery Manager.
// TODO(ptodev): Add ability to unregister the metrics?
func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]DiscovererMetrics, error) {
@@ -81,9 +83,9 @@ func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]Discovere
}
// NewManager is the Discovery Manager constructor.
-func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager {
+func NewManager(ctx context.Context, logger *slog.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager {
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
mgr := &Manager{
logger: logger,
@@ -101,12 +103,12 @@ func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Re
// Register the metrics.
// We have to do this after setting all options, so that the name of the Manager is set.
- if metrics, err := NewManagerMetrics(registerer, mgr.name); err == nil {
- mgr.metrics = metrics
- } else {
- level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
+ metrics, err := NewManagerMetrics(registerer, mgr.name)
+ if err != nil {
+ logger.Error("Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
return nil
}
+ mgr.metrics = metrics
return mgr
}
@@ -141,7 +143,7 @@ func HTTPClientOptions(opts ...config.HTTPClientOption) func(*Manager) {
// Manager maintains a set of discovery providers and sends each update to a map channel.
// Targets are grouped by the target set name.
type Manager struct {
- logger log.Logger
+ logger *slog.Logger
name string
httpOpts []config.HTTPClientOption
mtx sync.RWMutex
@@ -212,29 +214,33 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
m.metrics.FailedConfigs.Set(float64(failedCount))
var (
- wg sync.WaitGroup
- // keep shows if we keep any providers after reload.
- keep bool
+ wg sync.WaitGroup
newProviders []*Provider
)
for _, prov := range m.providers {
- // Cancel obsolete providers.
- if len(prov.newSubs) == 0 {
+ // Cancel obsolete providers if it has no new subs and it has a cancel function.
+ // prov.cancel != nil is the same check as we use in IsStarted() method but we don't call IsStarted
+ // here because it would take a lock and we need the same lock ourselves for other reads.
+ prov.mu.RLock()
+ if len(prov.newSubs) == 0 && prov.cancel != nil {
wg.Add(1)
prov.done = func() {
wg.Done()
}
+
prov.cancel()
+ prov.mu.RUnlock()
continue
}
+ prov.mu.RUnlock()
+
newProviders = append(newProviders, prov)
- // refTargets keeps reference targets used to populate new subs' targets
+ // refTargets keeps reference targets used to populate new subs' targets as they should be the same.
var refTargets map[string]*targetgroup.Group
prov.mu.Lock()
m.targetsMtx.Lock()
for s := range prov.subs {
- keep = true
refTargets = m.targets[poolKey{s, prov.name}]
// Remove obsolete subs' targets.
if _, ok := prov.newSubs[s]; !ok {
@@ -267,7 +273,9 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
// While startProvider does pull the trigger, it may take some time to do so, therefore
// we pull the trigger as soon as possible so that downstream managers can populate their state.
// See https://github.com/prometheus/prometheus/pull/8639 for details.
- if keep {
+ // This also helps making the downstream managers drop stale targets as soon as possible.
+ // See https://github.com/prometheus/prometheus/pull/13147 for details.
+ if len(m.providers) > 0 {
select {
case m.triggerSend <- struct{}{}:
default:
@@ -288,16 +296,20 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D
name: {},
},
}
+ m.mtx.Lock()
m.providers = append(m.providers, p)
+ m.mtx.Unlock()
m.startProvider(ctx, p)
}
func (m *Manager) startProvider(ctx context.Context, p *Provider) {
- level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs))
+ m.logger.Debug("Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs))
ctx, cancel := context.WithCancel(ctx)
updates := make(chan []*targetgroup.Group)
+ p.mu.Lock()
p.cancel = cancel
+ p.mu.Unlock()
go p.d.Run(ctx, updates)
go m.updater(ctx, p, updates)
@@ -305,16 +317,20 @@ func (m *Manager) startProvider(ctx context.Context, p *Provider) {
// cleaner cleans resources associated with provider.
func (m *Manager) cleaner(p *Provider) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
m.targetsMtx.Lock()
- p.mu.RLock()
for s := range p.subs {
delete(m.targets, poolKey{s, p.name})
}
- p.mu.RUnlock()
m.targetsMtx.Unlock()
if p.done != nil {
p.done()
}
+
+ // Provider was cleaned so mark is as down.
+ p.cancel = nil
}
func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targetgroup.Group) {
@@ -327,7 +343,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ
case tgs, ok := <-updates:
m.metrics.ReceivedUpdates.Inc()
if !ok {
- level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
+ m.logger.Debug("Discoverer channel closed", "provider", p.name)
// Wait for provider cancellation to ensure targets are cleaned up when expected.
<-ctx.Done()
return
@@ -363,7 +379,7 @@ func (m *Manager) sender() {
case m.syncCh <- m.allGroups():
default:
m.metrics.DelayedUpdates.Inc()
- level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
+ m.logger.Debug("Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
default:
@@ -379,9 +395,11 @@ func (m *Manager) cancelDiscoverers() {
m.mtx.RLock()
defer m.mtx.RUnlock()
for _, p := range m.providers {
+ p.mu.RLock()
if p.cancel != nil {
p.cancel()
}
+ p.mu.RUnlock()
}
}
@@ -393,8 +411,16 @@ func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) {
m.targets[poolKey] = make(map[string]*targetgroup.Group)
}
for _, tg := range tgs {
- if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics.
+ // Some Discoverers send nil target group so need to check for it to avoid panics.
+ if tg == nil {
+ continue
+ }
+ if len(tg.Targets) > 0 {
m.targets[poolKey][tg.Source] = tg
+ } else {
+ // The target group is empty, drop the corresponding entry to avoid leaks.
+ // In case the group yielded targets before, allGroups() will take care of making consumers drop them.
+ delete(m.targets[poolKey], tg.Source)
}
}
}
@@ -403,19 +429,33 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
tSets := map[string][]*targetgroup.Group{}
n := map[string]int{}
- m.targetsMtx.Lock()
- defer m.targetsMtx.Unlock()
- for pkey, tsets := range m.targets {
- for _, tg := range tsets {
- // Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager'
- // to signal that it needs to stop all scrape loops for this target set.
- tSets[pkey.setName] = append(tSets[pkey.setName], tg)
- n[pkey.setName] += len(tg.Targets)
+ m.mtx.RLock()
+ for _, p := range m.providers {
+ p.mu.RLock()
+ m.targetsMtx.Lock()
+ for s := range p.subs {
+ // Send empty lists for subs without any targets to make sure old stale targets are dropped by consumers.
+ // See: https://github.com/prometheus/prometheus/issues/12858 for details.
+ if _, ok := tSets[s]; !ok {
+ tSets[s] = []*targetgroup.Group{}
+ n[s] = 0
+ }
+ if tsets, ok := m.targets[poolKey{s, p.name}]; ok {
+ for _, tg := range tsets {
+ tSets[s] = append(tSets[s], tg)
+ n[s] += len(tg.Targets)
+ }
+ }
}
+ m.targetsMtx.Unlock()
+ p.mu.RUnlock()
}
+ m.mtx.RUnlock()
+
for setName, v := range n {
m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
+
return tSets
}
@@ -435,12 +475,12 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int {
}
typ := cfg.Name()
d, err := cfg.NewDiscoverer(DiscovererOptions{
- Logger: log.With(m.logger, "discovery", typ, "config", setName),
+ Logger: m.logger.With("discovery", typ, "config", setName),
HTTPClientOptions: m.httpOpts,
Metrics: m.sdMetrics[typ],
})
if err != nil {
- level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)
+ m.logger.Error("Cannot create service discovery", "err", err, "type", typ, "config", setName)
failed++
return
}
diff --git a/discovery/manager_test.go b/discovery/manager_test.go
index be07edbdb4..38a93be9f4 100644
--- a/discovery/manager_test.go
+++ b/discovery/manager_test.go
@@ -15,6 +15,7 @@ package discovery
import (
"context"
+ "errors"
"fmt"
"sort"
"strconv"
@@ -22,10 +23,10 @@ import (
"testing"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -675,7 +676,7 @@ func TestTargetUpdatesOrder(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
@@ -694,7 +695,7 @@ func TestTargetUpdatesOrder(t *testing.T) {
for x := 0; x < totalUpdatesCount; x++ {
select {
case <-ctx.Done():
- require.FailNow(t, "%d: no update arrived within the timeout limit", x)
+ t.Fatalf("%d: no update arrived within the timeout limit", x)
case tgs := <-provUpdates:
discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs)
for _, got := range discoveryManager.allGroups() {
@@ -768,12 +769,10 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou
}
}
}
- if match != present {
- msg := ""
- if !present {
- msg = "not"
- }
- require.FailNow(t, "%q should %s be present in Targets labels: %q", label, msg, mergedTargets)
+ if present {
+ require.Truef(t, match, "%q must be present in Targets labels: %q", label, mergedTargets)
+ } else {
+ require.Falsef(t, match, "%q must be absent in Targets labels: %q", label, mergedTargets)
}
}
@@ -791,7 +790,7 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -828,7 +827,7 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -868,7 +867,7 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -911,7 +910,7 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -939,11 +938,13 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
discoveryManager.ApplyConfig(c)
// Original targets should be present as soon as possible.
+ // An empty list should be sent for prometheus2 to drop any stale targets
syncedTargets = <-discoveryManager.SyncCh()
mu.Unlock()
- require.Len(t, syncedTargets, 1)
+ require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
+ require.Empty(t, syncedTargets["prometheus2"])
// prometheus2 configs should be ready on second sync.
syncedTargets = <-discoveryManager.SyncCh()
@@ -977,7 +978,7 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1021,7 +1022,7 @@ func TestDiscovererConfigs(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1049,8 +1050,8 @@ func TestDiscovererConfigs(t *testing.T) {
}
// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
-// removing all targets from the static_configs sends an update with empty targetGroups.
-// This is required to signal the receiver that this target set has no current targets.
+// removing all targets from the static_configs cleans the corresponding targetGroups entries to avoid leaks and sends an empty update.
+// The update is required to signal the consumers that the previous targets should be dropped.
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@@ -1058,7 +1059,7 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1083,16 +1084,14 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
+ require.Len(t, discoveryManager.targets, 1)
p = pk("static", "prometheus", 1)
targetGroups, ok := discoveryManager.targets[p]
- require.True(t, ok, "'%v' should be present in target groups", p)
- group, ok := targetGroups[""]
- require.True(t, ok, "missing '' key in target groups %v", targetGroups)
-
- require.Empty(t, group.Targets, "Invalid number of targets.")
- require.Len(t, syncedTargets, 1)
- require.Len(t, syncedTargets["prometheus"], 1)
- require.Nil(t, syncedTargets["prometheus"][0].Labels)
+ require.True(t, ok, "'%v' should be present in targets", p)
+ // Otherwise the targetGroups will leak, see https://github.com/prometheus/prometheus/issues/12436.
+ require.Empty(t, targetGroups, "'%v' should no longer have any associated target groups", p)
+ require.Len(t, syncedTargets, 1, "an update with no targetGroups should still be sent.")
+ require.Empty(t, syncedTargets["prometheus"])
}
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
@@ -1141,7 +1140,7 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1202,16 +1201,16 @@ func TestGaugeFailedConfigs(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
- errorConfig{fmt.Errorf("tests error 0")},
- errorConfig{fmt.Errorf("tests error 1")},
- errorConfig{fmt.Errorf("tests error 2")},
+ errorConfig{errors.New("tests error 0")},
+ errorConfig{errors.New("tests error 1")},
+ errorConfig{errors.New("tests error 2")},
},
}
discoveryManager.ApplyConfig(c)
@@ -1275,6 +1274,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
+ "mock1": {},
},
},
{
@@ -1371,10 +1371,10 @@ func TestCoordinationWithReceiver(t *testing.T) {
time.Sleep(expected.delay)
select {
case <-ctx.Done():
- require.FailNow(t, "step %d: no update received in the expected timeframe", i)
+ t.Fatalf("step %d: no update received in the expected timeframe", i)
case tgs, ok := <-mgr.SyncCh():
require.True(t, ok, "step %d: discovery manager channel is closed", i)
- require.Equal(t, len(expected.tgs), len(tgs), "step %d: targets mismatch", i)
+ require.Len(t, tgs, len(expected.tgs), "step %d: targets mismatch", i)
for k := range expected.tgs {
_, ok := tgs[k]
@@ -1453,7 +1453,7 @@ func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1550,7 +1550,7 @@ func TestUnregisterMetrics(t *testing.T) {
refreshMetrics, sdMetrics := NewTestMetrics(t, reg)
- discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
+ discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
// discoveryManager will be nil if there was an error configuring metrics.
require.NotNil(t, discoveryManager)
// Unregister all metrics.
@@ -1562,3 +1562,53 @@ func TestUnregisterMetrics(t *testing.T) {
cancel()
}
}
+
+// Calling ApplyConfig() that removes providers at the same time as shutting down
+// the manager should not hang.
+func TestConfigReloadAndShutdownRace(t *testing.T) {
+ reg := prometheus.NewRegistry()
+ _, sdMetrics := NewTestMetrics(t, reg)
+
+ mgrCtx, mgrCancel := context.WithCancel(context.Background())
+ discoveryManager := NewManager(mgrCtx, promslog.NewNopLogger(), reg, sdMetrics)
+ require.NotNil(t, discoveryManager)
+ discoveryManager.updatert = 100 * time.Millisecond
+
+ var wgDiscovery sync.WaitGroup
+ wgDiscovery.Add(1)
+ go func() {
+ discoveryManager.Run()
+ wgDiscovery.Done()
+ }()
+ time.Sleep(time.Millisecond * 200)
+
+ var wgBg sync.WaitGroup
+ updateChan := discoveryManager.SyncCh()
+ wgBg.Add(1)
+ ctx, cancel := context.WithCancel(context.Background())
+ go func() {
+ defer wgBg.Done()
+ select {
+ case <-ctx.Done():
+ return
+ case <-updateChan:
+ }
+ }()
+
+ c := map[string]Configs{
+ "prometheus": {staticConfig("bar:9090")},
+ }
+ discoveryManager.ApplyConfig(c)
+
+ delete(c, "prometheus")
+ wgBg.Add(1)
+ go func() {
+ discoveryManager.ApplyConfig(c)
+ wgBg.Done()
+ }()
+ mgrCancel()
+ wgDiscovery.Wait()
+
+ cancel()
+ wgBg.Wait()
+}
diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go
index 38b47accff..0c2c2e9702 100644
--- a/discovery/marathon/marathon.go
+++ b/discovery/marathon/marathon.go
@@ -19,6 +19,7 @@ import (
"errors"
"fmt"
"io"
+ "log/slog"
"math/rand"
"net"
"net/http"
@@ -27,7 +28,6 @@ import (
"strings"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -80,7 +80,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &marathonMetrics{
refreshMetrics: rmi,
}
@@ -140,10 +140,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Marathon Discovery.
-func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*marathonMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd")
diff --git a/discovery/marathon/marathon_test.go b/discovery/marathon/marathon_test.go
index 659899f163..18ec7bdf19 100644
--- a/discovery/marathon/marathon_test.go
+++ b/discovery/marathon/marathon_test.go
@@ -202,7 +202,7 @@ func TestMarathonSDSendGroupWithMultiplePort(t *testing.T) {
tgt = tg.Targets[1]
require.Equal(t, "mesos-slave1:32000", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]),
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]),
"Wrong portMappings label from the second port: %s", tgt[model.AddressLabel])
}
@@ -243,7 +243,7 @@ func TestMarathonZeroTaskPorts(t *testing.T) {
func Test500ErrorHttpResponseWithValidJSONBody(t *testing.T) {
// Simulate 500 error with a valid JSON response.
- respHandler := func(w http.ResponseWriter, r *http.Request) {
+ respHandler := func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Header().Set("Content-Type", "application/json")
io.WriteString(w, `{}`)
@@ -300,9 +300,9 @@ func TestMarathonSDSendGroupWithPortDefinitions(t *testing.T) {
tgt := tg.Targets[0]
require.Equal(t, "mesos-slave1:1234", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]),
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]),
"Wrong portMappings label from the first port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]),
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]),
"Wrong portDefinitions label from the first port.")
tgt = tg.Targets[1]
@@ -354,12 +354,12 @@ func TestMarathonSDSendGroupWithPortDefinitionsRequirePorts(t *testing.T) {
tgt := tg.Targets[0]
require.Equal(t, "mesos-slave1:31000", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the first port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the first port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
tgt = tg.Targets[1]
require.Equal(t, "mesos-slave1:32000", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
require.Equal(t, "yes", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
}
@@ -401,13 +401,13 @@ func TestMarathonSDSendGroupWithPorts(t *testing.T) {
tgt := tg.Targets[0]
require.Equal(t, "mesos-slave1:31000", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the first port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the first port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
tgt = tg.Targets[1]
require.Equal(t, "mesos-slave1:32000", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
}
func marathonTestAppListWithContainerPortMappings(labels map[string]string, runningTasks int) *appList {
@@ -458,12 +458,12 @@ func TestMarathonSDSendGroupWithContainerPortMappings(t *testing.T) {
tgt := tg.Targets[0]
require.Equal(t, "mesos-slave1:12345", string(tgt[model.AddressLabel]), "Wrong target address.")
require.Equal(t, "yes", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the first port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
tgt = tg.Targets[1]
require.Equal(t, "mesos-slave1:32000", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
}
func marathonTestAppListWithDockerContainerPortMappings(labels map[string]string, runningTasks int) *appList {
@@ -514,12 +514,12 @@ func TestMarathonSDSendGroupWithDockerContainerPortMappings(t *testing.T) {
tgt := tg.Targets[0]
require.Equal(t, "mesos-slave1:31000", string(tgt[model.AddressLabel]), "Wrong target address.")
require.Equal(t, "yes", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the first port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
tgt = tg.Targets[1]
require.Equal(t, "mesos-slave1:12345", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
}
func marathonTestAppListWithContainerNetworkAndPortMappings(labels map[string]string, runningTasks int) *appList {
@@ -574,10 +574,10 @@ func TestMarathonSDSendGroupWithContainerNetworkAndPortMapping(t *testing.T) {
tgt := tg.Targets[0]
require.Equal(t, "1.2.3.4:8080", string(tgt[model.AddressLabel]), "Wrong target address.")
require.Equal(t, "yes", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the first port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the first port.")
tgt = tg.Targets[1]
require.Equal(t, "1.2.3.4:1234", string(tgt[model.AddressLabel]), "Wrong target address.")
- require.Equal(t, "", string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
- require.Equal(t, "", string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portMappingLabelPrefix+"prometheus")]), "Wrong portMappings label from the second port.")
+ require.Empty(t, string(tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")]), "Wrong portDefinitions label from the second port.")
}
diff --git a/discovery/metrics_refresh.go b/discovery/metrics_refresh.go
index d621165ced..ef49e591a3 100644
--- a/discovery/metrics_refresh.go
+++ b/discovery/metrics_refresh.go
@@ -17,7 +17,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
-// Metric vectors for the "refresh" package.
+// RefreshMetricsVecs are metric vectors for the "refresh" package.
// We define them here in the "discovery" package in order to avoid a cyclic dependency between
// "discovery" and "refresh".
type RefreshMetricsVecs struct {
diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go
index 11445092ee..2b640dea82 100644
--- a/discovery/moby/docker.go
+++ b/discovery/moby/docker.go
@@ -15,22 +15,24 @@ package moby
import (
"context"
+ "errors"
"fmt"
+ "log/slog"
"net"
"net/http"
"net/url"
+ "sort"
"strconv"
"time"
- "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/client"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/version"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@@ -109,7 +111,7 @@ func (c *DockerSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error
return err
}
if c.Host == "" {
- return fmt.Errorf("host missing")
+ return errors.New("host missing")
}
if _, err = url.Parse(c.Host); err != nil {
return err
@@ -127,10 +129,10 @@ type DockerDiscovery struct {
}
// NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets.
-func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) {
+func NewDockerDiscovery(conf *DockerSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) {
m, ok := metrics.(*dockerMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
d := &DockerDiscovery{
@@ -171,7 +173,7 @@ func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, metrics discove
}),
client.WithScheme(hostURL.Scheme),
client.WithHTTPHeaders(map[string]string{
- "User-Agent": userAgent,
+ "User-Agent": version.PrometheusUserAgent(),
}),
)
}
@@ -208,7 +210,7 @@ func (d *DockerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
return nil, fmt.Errorf("error while computing network labels: %w", err)
}
- allContainers := make(map[string]types.Container)
+ allContainers := make(map[string]container.Summary)
for _, c := range containers {
allContainers[c.ID] = c
}
@@ -233,46 +235,40 @@ func (d *DockerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
containerNetworkMode := container.NetworkMode(c.HostConfig.NetworkMode)
if len(networks) == 0 {
// Try to lookup shared networks
- for {
- if containerNetworkMode.IsContainer() {
- tmpContainer, exists := allContainers[containerNetworkMode.ConnectedContainer()]
- if !exists {
- break
- }
- networks = tmpContainer.NetworkSettings.Networks
- containerNetworkMode = container.NetworkMode(tmpContainer.HostConfig.NetworkMode)
- if len(networks) > 0 {
- break
- }
- } else {
+ for containerNetworkMode.IsContainer() {
+ tmpContainer, exists := allContainers[containerNetworkMode.ConnectedContainer()]
+ if !exists {
+ break
+ }
+ networks = tmpContainer.NetworkSettings.Networks
+ containerNetworkMode = container.NetworkMode(tmpContainer.HostConfig.NetworkMode)
+ if len(networks) > 0 {
break
}
}
}
if d.matchFirstNetwork && len(networks) > 1 {
- // Match user defined network
- if containerNetworkMode.IsUserDefined() {
- networkMode := string(containerNetworkMode)
- networks = map[string]*network.EndpointSettings{networkMode: networks[networkMode]}
- } else {
- // Get first network if container network mode has "none" value.
- // This case appears under certain condition:
- // 1. Container created with network set to "--net=none".
- // 2. Disconnect network "none".
- // 3. Reconnect network with user defined networks.
- var first string
- for k, n := range networks {
- if n != nil {
- first = k
- break
- }
+ // Sort networks by name and take first non-nil network.
+ keys := make([]string, 0, len(networks))
+ for k, n := range networks {
+ if n != nil {
+ keys = append(keys, k)
}
- networks = map[string]*network.EndpointSettings{first: networks[first]}
+ }
+ if len(keys) > 0 {
+ sort.Strings(keys)
+ firstNetworkMode := keys[0]
+ firstNetwork := networks[firstNetworkMode]
+ networks = map[string]*network.EndpointSettings{firstNetworkMode: firstNetwork}
}
}
for _, n := range networks {
+ if n == nil {
+ continue
+ }
+
var added bool
for _, p := range c.Ports {
diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go
index c108ddf582..00e6a3e4f3 100644
--- a/discovery/moby/docker_test.go
+++ b/discovery/moby/docker_test.go
@@ -19,9 +19,9 @@ import (
"sort"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -60,9 +60,9 @@ host: %s
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Len(t, tg.Targets, 6)
+ require.Len(t, tg.Targets, 8)
- for i, lbls := range []model.LabelSet{
+ expected := []model.LabelSet{
{
"__address__": "172.19.0.2:9100",
"__meta_docker_container_id": "c301b928faceb1a18fe379f6bc178727ef920bb30b0f9b8592b32b36255a0eca",
@@ -163,7 +163,43 @@ host: %s
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "9104",
},
- } {
+ {
+ "__address__": "172.20.0.3:3306",
+ "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+ "__meta_docker_container_label_com_docker_compose_project": "dockersd",
+ "__meta_docker_container_label_com_docker_compose_service": "mysql",
+ "__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+ "__meta_docker_container_name": "/dockersd_multi_networks",
+ "__meta_docker_container_network_mode": "dockersd_private_none",
+ "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+ "__meta_docker_network_ingress": "false",
+ "__meta_docker_network_internal": "false",
+ "__meta_docker_network_ip": "172.20.0.3",
+ "__meta_docker_network_name": "dockersd_private",
+ "__meta_docker_network_scope": "local",
+ "__meta_docker_port_private": "3306",
+ },
+ {
+ "__address__": "172.20.0.3:33060",
+ "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+ "__meta_docker_container_label_com_docker_compose_project": "dockersd",
+ "__meta_docker_container_label_com_docker_compose_service": "mysql",
+ "__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+ "__meta_docker_container_name": "/dockersd_multi_networks",
+ "__meta_docker_container_network_mode": "dockersd_private_none",
+ "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+ "__meta_docker_network_ingress": "false",
+ "__meta_docker_network_internal": "false",
+ "__meta_docker_network_ip": "172.20.0.3",
+ "__meta_docker_network_name": "dockersd_private",
+ "__meta_docker_network_scope": "local",
+ "__meta_docker_port_private": "33060",
+ },
+ }
+ sortFunc(expected)
+ sortFunc(tg.Targets)
+
+ for i, lbls := range expected {
t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
require.Equal(t, lbls, tg.Targets[i])
})
@@ -190,7 +226,7 @@ host: %s
require.NoError(t, metrics.Register())
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -202,13 +238,8 @@ host: %s
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Len(t, tg.Targets, 9)
+ require.Len(t, tg.Targets, 13)
- sortFunc := func(labelSets []model.LabelSet) {
- sort.Slice(labelSets, func(i, j int) bool {
- return labelSets[i]["__address__"] < labelSets[j]["__address__"]
- })
- }
expected := []model.LabelSet{
{
"__address__": "172.19.0.2:9100",
@@ -359,6 +390,70 @@ host: %s
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "9104",
},
+ {
+ "__address__": "172.20.0.3:3306",
+ "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+ "__meta_docker_container_label_com_docker_compose_project": "dockersd",
+ "__meta_docker_container_label_com_docker_compose_service": "mysql",
+ "__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+ "__meta_docker_container_name": "/dockersd_multi_networks",
+ "__meta_docker_container_network_mode": "dockersd_private_none",
+ "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+ "__meta_docker_network_ingress": "false",
+ "__meta_docker_network_internal": "false",
+ "__meta_docker_network_ip": "172.20.0.3",
+ "__meta_docker_network_name": "dockersd_private",
+ "__meta_docker_network_scope": "local",
+ "__meta_docker_port_private": "3306",
+ },
+ {
+ "__address__": "172.20.0.3:33060",
+ "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+ "__meta_docker_container_label_com_docker_compose_project": "dockersd",
+ "__meta_docker_container_label_com_docker_compose_service": "mysql",
+ "__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+ "__meta_docker_container_name": "/dockersd_multi_networks",
+ "__meta_docker_container_network_mode": "dockersd_private_none",
+ "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+ "__meta_docker_network_ingress": "false",
+ "__meta_docker_network_internal": "false",
+ "__meta_docker_network_ip": "172.20.0.3",
+ "__meta_docker_network_name": "dockersd_private",
+ "__meta_docker_network_scope": "local",
+ "__meta_docker_port_private": "33060",
+ },
+ {
+ "__address__": "172.21.0.3:3306",
+ "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+ "__meta_docker_container_label_com_docker_compose_project": "dockersd",
+ "__meta_docker_container_label_com_docker_compose_service": "mysql",
+ "__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+ "__meta_docker_container_name": "/dockersd_multi_networks",
+ "__meta_docker_container_network_mode": "dockersd_private_none",
+ "__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
+ "__meta_docker_network_ingress": "false",
+ "__meta_docker_network_internal": "false",
+ "__meta_docker_network_ip": "172.21.0.3",
+ "__meta_docker_network_name": "dockersd_private1",
+ "__meta_docker_network_scope": "local",
+ "__meta_docker_port_private": "3306",
+ },
+ {
+ "__address__": "172.21.0.3:33060",
+ "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+ "__meta_docker_container_label_com_docker_compose_project": "dockersd",
+ "__meta_docker_container_label_com_docker_compose_service": "mysql",
+ "__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+ "__meta_docker_container_name": "/dockersd_multi_networks",
+ "__meta_docker_container_network_mode": "dockersd_private_none",
+ "__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
+ "__meta_docker_network_ingress": "false",
+ "__meta_docker_network_internal": "false",
+ "__meta_docker_network_ip": "172.21.0.3",
+ "__meta_docker_network_name": "dockersd_private1",
+ "__meta_docker_network_scope": "local",
+ "__meta_docker_port_private": "33060",
+ },
}
sortFunc(expected)
@@ -370,3 +465,9 @@ host: %s
})
}
}
+
+func sortFunc(labelSets []model.LabelSet) {
+ sort.Slice(labelSets, func(i, j int) bool {
+ return labelSets[i]["__address__"] < labelSets[j]["__address__"]
+ })
+}
diff --git a/discovery/moby/dockerswarm.go b/discovery/moby/dockerswarm.go
index b0147467d2..57c0af7171 100644
--- a/discovery/moby/dockerswarm.go
+++ b/discovery/moby/dockerswarm.go
@@ -15,14 +15,15 @@ package moby
import (
"context"
+ "errors"
"fmt"
+ "log/slog"
"net/http"
"net/url"
"time"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -37,8 +38,6 @@ const (
swarmLabel = model.MetaLabelPrefix + "dockerswarm_"
)
-var userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
-
// DefaultDockerSwarmSDConfig is the default Docker Swarm SD configuration.
var DefaultDockerSwarmSDConfig = DockerSwarmSDConfig{
RefreshInterval: model.Duration(60 * time.Second),
@@ -71,7 +70,7 @@ type Filter struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*DockerSwarmSDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*DockerSwarmSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &dockerswarmMetrics{
refreshMetrics: rmi,
}
@@ -99,7 +98,7 @@ func (c *DockerSwarmSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) e
return err
}
if c.Host == "" {
- return fmt.Errorf("host missing")
+ return errors.New("host missing")
}
if _, err = url.Parse(c.Host); err != nil {
return err
@@ -107,7 +106,7 @@ func (c *DockerSwarmSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) e
switch c.Role {
case "services", "nodes", "tasks":
case "":
- return fmt.Errorf("role missing (one of: tasks, services, nodes)")
+ return errors.New("role missing (one of: tasks, services, nodes)")
default:
return fmt.Errorf("invalid role %s, expected tasks, services, or nodes", c.Role)
}
@@ -125,10 +124,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *DockerSwarmSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*dockerswarmMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
d := &Discovery{
@@ -168,7 +167,7 @@ func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, metrics discover
}),
client.WithScheme(hostURL.Scheme),
client.WithHTTPHeaders(map[string]string{
- "User-Agent": userAgent,
+ "User-Agent": version.PrometheusUserAgent(),
}),
)
}
diff --git a/discovery/moby/mock_test.go b/discovery/moby/mock_test.go
index 3f35258c8f..7ef5cb07c3 100644
--- a/discovery/moby/mock_test.go
+++ b/discovery/moby/mock_test.go
@@ -98,7 +98,7 @@ func (m *SDMock) SetupHandlers() {
if len(query) == 2 {
h := sha1.New()
h.Write([]byte(query[1]))
- // Avoing long filenames for Windows.
+ // Avoiding long filenames for Windows.
f += "__" + base64.URLEncoding.EncodeToString(h.Sum(nil))[:10]
}
}
diff --git a/discovery/moby/network.go b/discovery/moby/network.go
index 794d2e607d..ea1ca66bc7 100644
--- a/discovery/moby/network.go
+++ b/discovery/moby/network.go
@@ -17,7 +17,7 @@ import (
"context"
"strconv"
- "github.com/docker/docker/api/types"
+ "github.com/docker/docker/api/types/network"
"github.com/docker/docker/client"
"github.com/prometheus/prometheus/util/strutil"
@@ -34,7 +34,7 @@ const (
)
func getNetworksLabels(ctx context.Context, client *client.Client, labelPrefix string) (map[string]map[string]string, error) {
- networks, err := client.NetworkList(ctx, types.NetworkListOptions{})
+ networks, err := client.NetworkList(ctx, network.ListOptions{})
if err != nil {
return nil, err
}
diff --git a/discovery/moby/nodes.go b/discovery/moby/nodes.go
index b5be844eda..a11afeee25 100644
--- a/discovery/moby/nodes.go
+++ b/discovery/moby/nodes.go
@@ -19,7 +19,7 @@ import (
"net"
"strconv"
- "github.com/docker/docker/api/types"
+ "github.com/docker/docker/api/types/swarm"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -48,7 +48,7 @@ func (d *Discovery) refreshNodes(ctx context.Context) ([]*targetgroup.Group, err
Source: "DockerSwarm",
}
- nodes, err := d.client.NodeList(ctx, types.NodeListOptions{Filters: d.filters})
+ nodes, err := d.client.NodeList(ctx, swarm.NodeListOptions{Filters: d.filters})
if err != nil {
return nil, fmt.Errorf("error while listing swarm nodes: %w", err)
}
@@ -85,7 +85,7 @@ func (d *Discovery) refreshNodes(ctx context.Context) ([]*targetgroup.Group, err
}
func (d *Discovery) getNodesLabels(ctx context.Context) (map[string]map[string]string, error) {
- nodes, err := d.client.NodeList(ctx, types.NodeListOptions{})
+ nodes, err := d.client.NodeList(ctx, swarm.NodeListOptions{})
if err != nil {
return nil, fmt.Errorf("error while listing swarm nodes: %w", err)
}
diff --git a/discovery/moby/nodes_test.go b/discovery/moby/nodes_test.go
index 4ad1088d1a..973b83c4b6 100644
--- a/discovery/moby/nodes_test.go
+++ b/discovery/moby/nodes_test.go
@@ -18,9 +18,9 @@ import (
"fmt"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
diff --git a/discovery/moby/services.go b/discovery/moby/services.go
index c61b499259..0698c01e6a 100644
--- a/discovery/moby/services.go
+++ b/discovery/moby/services.go
@@ -19,7 +19,6 @@ import (
"net"
"strconv"
- "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/swarm"
"github.com/prometheus/common/model"
@@ -46,7 +45,7 @@ func (d *Discovery) refreshServices(ctx context.Context) ([]*targetgroup.Group,
Source: "DockerSwarm",
}
- services, err := d.client.ServiceList(ctx, types.ServiceListOptions{Filters: d.filters})
+ services, err := d.client.ServiceList(ctx, swarm.ServiceListOptions{Filters: d.filters})
if err != nil {
return nil, fmt.Errorf("error while listing swarm services: %w", err)
}
@@ -127,7 +126,7 @@ func (d *Discovery) refreshServices(ctx context.Context) ([]*targetgroup.Group,
}
func (d *Discovery) getServicesLabelsAndPorts(ctx context.Context) (map[string]map[string]string, map[string][]swarm.PortConfig, error) {
- services, err := d.client.ServiceList(ctx, types.ServiceListOptions{})
+ services, err := d.client.ServiceList(ctx, swarm.ServiceListOptions{})
if err != nil {
return nil, nil, err
}
diff --git a/discovery/moby/services_test.go b/discovery/moby/services_test.go
index 47ca69e33a..7a966cfeee 100644
--- a/discovery/moby/services_test.go
+++ b/discovery/moby/services_test.go
@@ -18,9 +18,9 @@ import (
"fmt"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -349,7 +349,7 @@ filters:
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
diff --git a/discovery/moby/tasks.go b/discovery/moby/tasks.go
index 38b9d33de2..46e8a06d01 100644
--- a/discovery/moby/tasks.go
+++ b/discovery/moby/tasks.go
@@ -19,7 +19,6 @@ import (
"net"
"strconv"
- "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/swarm"
"github.com/prometheus/common/model"
@@ -43,7 +42,7 @@ func (d *Discovery) refreshTasks(ctx context.Context) ([]*targetgroup.Group, err
Source: "DockerSwarm",
}
- tasks, err := d.client.TaskList(ctx, types.TaskListOptions{Filters: d.filters})
+ tasks, err := d.client.TaskList(ctx, swarm.TaskListOptions{Filters: d.filters})
if err != nil {
return nil, fmt.Errorf("error while listing swarm services: %w", err)
}
diff --git a/discovery/moby/tasks_test.go b/discovery/moby/tasks_test.go
index ef71bc02f5..59d8831c3b 100644
--- a/discovery/moby/tasks_test.go
+++ b/discovery/moby/tasks_test.go
@@ -18,9 +18,9 @@ import (
"fmt"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
diff --git a/discovery/moby/testdata/dockerprom/containers/json.json b/discovery/moby/testdata/dockerprom/containers/json.json
index ebfc56b6d5..33406bf9a4 100644
--- a/discovery/moby/testdata/dockerprom/containers/json.json
+++ b/discovery/moby/testdata/dockerprom/containers/json.json
@@ -228,5 +228,74 @@
"Networks": {}
},
"Mounts": []
+ },
+ {
+ "Id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+ "Names": [
+ "/dockersd_multi_networks"
+ ],
+ "Image": "mysql:5.7.29",
+ "ImageID": "sha256:16ae2f4625ba63a250462bedeece422e741de9f0caf3b1d89fd5b257aca80cd1",
+ "Command": "mysqld",
+ "Created": 1616273136,
+ "Ports": [
+ {
+ "PrivatePort": 3306,
+ "Type": "tcp"
+ },
+ {
+ "PrivatePort": 33060,
+ "Type": "tcp"
+ }
+ ],
+ "Labels": {
+ "com.docker.compose.project": "dockersd",
+ "com.docker.compose.service": "mysql",
+ "com.docker.compose.version": "2.2.2"
+ },
+ "State": "running",
+ "Status": "Up 40 seconds",
+ "HostConfig": {
+ "NetworkMode": "dockersd_private_none"
+ },
+ "NetworkSettings": {
+ "Networks": {
+ "dockersd_private": {
+ "IPAMConfig": null,
+ "Links": null,
+ "Aliases": null,
+ "NetworkID": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+ "EndpointID": "972d6807997369605ace863af58de6cb90c787a5bf2ffc4105662d393ae539b7",
+ "Gateway": "172.20.0.1",
+ "IPAddress": "172.20.0.3",
+ "IPPrefixLen": 16,
+ "IPv6Gateway": "",
+ "GlobalIPv6Address": "",
+ "GlobalIPv6PrefixLen": 0,
+ "MacAddress": "02:42:ac:14:00:02",
+ "DriverOpts": null
+ },
+ "dockersd_private1": {
+ "IPAMConfig": {},
+ "Links": null,
+ "Aliases": [
+ "mysql",
+ "mysql",
+ "f9ade4b83199"
+ ],
+ "NetworkID": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
+ "EndpointID": "91a98405344ee1cb7d977cafabe634837876651544b32da20a5e0155868e6f5f",
+ "Gateway": "172.21.0.1",
+ "IPAddress": "172.21.0.3",
+ "IPPrefixLen": 24,
+ "IPv6Gateway": "",
+ "GlobalIPv6Address": "",
+ "GlobalIPv6PrefixLen": 0,
+ "MacAddress": "02:42:ac:15:00:02",
+ "DriverOpts": null
+ }
+ }
+ },
+ "Mounts": []
}
]
diff --git a/discovery/moby/testdata/swarmprom/services.json b/discovery/moby/testdata/swarmprom/services.json
index 72caa7a7f8..8f6c0793dd 100644
--- a/discovery/moby/testdata/swarmprom/services.json
+++ b/discovery/moby/testdata/swarmprom/services.json
@@ -224,7 +224,7 @@
"Args": [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
- "--storage.tsdb.retention=24h"
+ "--storage.tsdb.retention.time=24h"
],
"Privileges": {
"CredentialSpec": null,
diff --git a/discovery/moby/testdata/swarmprom/tasks.json b/discovery/moby/testdata/swarmprom/tasks.json
index 33d81f25ce..af5ff9fe28 100644
--- a/discovery/moby/testdata/swarmprom/tasks.json
+++ b/discovery/moby/testdata/swarmprom/tasks.json
@@ -973,7 +973,7 @@
"Args": [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
- "--storage.tsdb.retention=24h"
+ "--storage.tsdb.retention.time=24h"
],
"Privileges": {
"CredentialSpec": null,
diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go
index d9c48120ae..7516308026 100644
--- a/discovery/nomad/nomad.go
+++ b/discovery/nomad/nomad.go
@@ -17,12 +17,12 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
nomad "github.com/hashicorp/nomad/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -121,10 +121,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*nomadMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
d := &Discovery{
diff --git a/discovery/nomad/nomad_test.go b/discovery/nomad/nomad_test.go
index 357d4a8e9b..a73b45785d 100644
--- a/discovery/nomad/nomad_test.go
+++ b/discovery/nomad/nomad_test.go
@@ -21,9 +21,9 @@ import (
"net/url"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@@ -76,7 +76,7 @@ func (s *NomadSDTestSuite) SetupTest(t *testing.T) {
}
func (m *SDMock) HandleServicesList() {
- m.Mux.HandleFunc("/v1/services", func(w http.ResponseWriter, r *http.Request) {
+ m.Mux.HandleFunc("/v1/services", func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("content-type", "application/json; charset=utf-8")
w.WriteHeader(http.StatusOK)
@@ -99,7 +99,7 @@ func (m *SDMock) HandleServicesList() {
}
func (m *SDMock) HandleServiceHashiCupsGet() {
- m.Mux.HandleFunc("/v1/service/hashicups", func(w http.ResponseWriter, r *http.Request) {
+ m.Mux.HandleFunc("/v1/service/hashicups", func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("content-type", "application/json; charset=utf-8")
w.WriteHeader(http.StatusOK)
@@ -127,19 +127,37 @@ func (m *SDMock) HandleServiceHashiCupsGet() {
}
func TestConfiguredService(t *testing.T) {
- conf := &SDConfig{
- Server: "http://localhost:4646",
+ testCases := []struct {
+ name string
+ server string
+ acceptedURL bool
+ }{
+ {"invalid hostname URL", "http://foo.bar:4646", true},
+ {"invalid even though accepted by parsing", "foo.bar:4646", true},
+ {"valid address URL", "http://172.30.29.23:4646", true},
+ {"invalid URL", "172.30.29.23:4646", false},
}
- reg := prometheus.NewRegistry()
- refreshMetrics := discovery.NewRefreshMetrics(reg)
- metrics := conf.NewDiscovererMetrics(reg, refreshMetrics)
- require.NoError(t, metrics.Register())
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ conf := &SDConfig{
+ Server: tc.server,
+ }
- _, err := NewDiscovery(conf, nil, metrics)
- require.NoError(t, err)
+ reg := prometheus.NewRegistry()
+ refreshMetrics := discovery.NewRefreshMetrics(reg)
+ metrics := conf.NewDiscovererMetrics(reg, refreshMetrics)
+ require.NoError(t, metrics.Register())
+ defer metrics.Unregister()
- metrics.Unregister()
+ _, err := NewDiscovery(conf, nil, metrics)
+ if tc.acceptedURL {
+ require.NoError(t, err)
+ } else {
+ require.Error(t, err)
+ }
+ })
+ }
}
func TestNomadSDRefresh(t *testing.T) {
@@ -160,7 +178,7 @@ func TestNomadSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
tgs, err := d.refresh(context.Background())
diff --git a/discovery/openstack/hypervisor.go b/discovery/openstack/hypervisor.go
index 8964da9294..e7a6362052 100644
--- a/discovery/openstack/hypervisor.go
+++ b/discovery/openstack/hypervisor.go
@@ -16,14 +16,14 @@ package openstack
import (
"context"
"fmt"
+ "log/slog"
"net"
"strconv"
- "github.com/go-kit/log"
- "github.com/gophercloud/gophercloud"
- "github.com/gophercloud/gophercloud/openstack"
- "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors"
- "github.com/gophercloud/gophercloud/pagination"
+ "github.com/gophercloud/gophercloud/v2"
+ "github.com/gophercloud/gophercloud/v2/openstack"
+ "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/hypervisors"
+ "github.com/gophercloud/gophercloud/v2/pagination"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -43,14 +43,14 @@ type HypervisorDiscovery struct {
provider *gophercloud.ProviderClient
authOpts *gophercloud.AuthOptions
region string
- logger log.Logger
+ logger *slog.Logger
port int
availability gophercloud.Availability
}
// newHypervisorDiscovery returns a new hypervisor discovery.
func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
- port int, region string, availability gophercloud.Availability, l log.Logger,
+ port int, region string, availability gophercloud.Availability, l *slog.Logger,
) *HypervisorDiscovery {
return &HypervisorDiscovery{
provider: provider, authOpts: opts,
@@ -59,8 +59,7 @@ func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercl
}
func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
- h.provider.Context = ctx
- err := openstack.Authenticate(h.provider, *h.authOpts)
+ err := openstack.Authenticate(ctx, h.provider, *h.authOpts)
if err != nil {
return nil, fmt.Errorf("could not authenticate to OpenStack: %w", err)
}
@@ -78,7 +77,7 @@ func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group
// OpenStack API reference
// https://developer.openstack.org/api-ref/compute/#list-hypervisors-details
pagerHypervisors := hypervisors.List(client, nil)
- err = pagerHypervisors.EachPage(func(page pagination.Page) (bool, error) {
+ err = pagerHypervisors.EachPage(ctx, func(_ context.Context, page pagination.Page) (bool, error) {
hypervisorList, err := hypervisors.ExtractHypervisors(page)
if err != nil {
return false, fmt.Errorf("could not extract hypervisors: %w", err)
diff --git a/discovery/openstack/hypervisor_test.go b/discovery/openstack/hypervisor_test.go
index 45684b4a2e..e4a97f32cf 100644
--- a/discovery/openstack/hypervisor_test.go
+++ b/discovery/openstack/hypervisor_test.go
@@ -93,6 +93,5 @@ func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := hypervisor.refresh(ctx)
- require.Error(t, err)
- require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
+ require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
}
diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go
index 78c669e6f7..6c2f79b3a4 100644
--- a/discovery/openstack/instance.go
+++ b/discovery/openstack/instance.go
@@ -16,17 +16,18 @@ package openstack
import (
"context"
"fmt"
+ "log/slog"
"net"
"strconv"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
- "github.com/gophercloud/gophercloud"
- "github.com/gophercloud/gophercloud/openstack"
- "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips"
- "github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
- "github.com/gophercloud/gophercloud/pagination"
+ "github.com/gophercloud/gophercloud/v2"
+ "github.com/gophercloud/gophercloud/v2/openstack"
+ "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/servers"
+ "github.com/gophercloud/gophercloud/v2/openstack/networking/v2/extensions/layer3/floatingips"
+ "github.com/gophercloud/gophercloud/v2/openstack/networking/v2/ports"
+ "github.com/gophercloud/gophercloud/v2/pagination"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
@@ -52,7 +53,7 @@ type InstanceDiscovery struct {
provider *gophercloud.ProviderClient
authOpts *gophercloud.AuthOptions
region string
- logger log.Logger
+ logger *slog.Logger
port int
allTenants bool
availability gophercloud.Availability
@@ -60,10 +61,10 @@ type InstanceDiscovery struct {
// NewInstanceDiscovery returns a new instance discovery.
func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
- port int, region string, allTenants bool, availability gophercloud.Availability, l log.Logger,
+ port int, region string, allTenants bool, availability gophercloud.Availability, l *slog.Logger,
) *InstanceDiscovery {
if l == nil {
- l = log.NewNopLogger()
+ l = promslog.NewNopLogger()
}
return &InstanceDiscovery{
provider: provider, authOpts: opts,
@@ -72,13 +73,12 @@ func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gopherclou
}
type floatingIPKey struct {
- id string
- fixed string
+ deviceID string
+ fixed string
}
func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
- i.provider.Context = ctx
- err := openstack.Authenticate(i.provider, *i.authOpts)
+ err := openstack.Authenticate(ctx, i.provider, *i.authOpts)
if err != nil {
return nil, fmt.Errorf("could not authenticate to OpenStack: %w", err)
}
@@ -90,23 +90,60 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
return nil, fmt.Errorf("could not create OpenStack compute session: %w", err)
}
+ networkClient, err := openstack.NewNetworkV2(i.provider, gophercloud.EndpointOpts{
+ Region: i.region, Availability: i.availability,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("could not create OpenStack network session: %w", err)
+ }
+
// OpenStack API reference
- // https://developer.openstack.org/api-ref/compute/#list-floating-ips
- pagerFIP := floatingips.List(client)
+ // https://docs.openstack.org/api-ref/network/v2/index.html#list-ports
+ portPages, err := ports.List(networkClient, ports.ListOpts{}).AllPages(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("failed to list all ports: %w", err)
+ }
+
+ allPorts, err := ports.ExtractPorts(portPages)
+ if err != nil {
+ return nil, fmt.Errorf("failed to extract Ports: %w", err)
+ }
+
+ portList := make(map[string]string)
+ for _, port := range allPorts {
+ portList[port.ID] = port.DeviceID
+ }
+
+ // OpenStack API reference
+ // https://docs.openstack.org/api-ref/network/v2/index.html#list-floating-ips
+ pagerFIP := floatingips.List(networkClient, floatingips.ListOpts{})
floatingIPList := make(map[floatingIPKey]string)
floatingIPPresent := make(map[string]struct{})
- err = pagerFIP.EachPage(func(page pagination.Page) (bool, error) {
+ err = pagerFIP.EachPage(ctx, func(_ context.Context, page pagination.Page) (bool, error) {
result, err := floatingips.ExtractFloatingIPs(page)
if err != nil {
return false, fmt.Errorf("could not extract floatingips: %w", err)
}
for _, ip := range result {
// Skip not associated ips
- if ip.InstanceID == "" || ip.FixedIP == "" {
+ if ip.PortID == "" || ip.FixedIP == "" {
continue
}
- floatingIPList[floatingIPKey{id: ip.InstanceID, fixed: ip.FixedIP}] = ip.IP
- floatingIPPresent[ip.IP] = struct{}{}
+
+ // Fetch deviceID from portList
+ deviceID, ok := portList[ip.PortID]
+ if !ok {
+ i.logger.Warn("Floating IP PortID not found in portList", "PortID", ip.PortID)
+ continue
+ }
+
+ key := floatingIPKey{
+ deviceID: deviceID,
+ fixed: ip.FixedIP,
+ }
+
+ floatingIPList[key] = ip.FloatingIP
+ floatingIPPresent[ip.FloatingIP] = struct{}{}
}
return true, nil
})
@@ -123,7 +160,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
tg := &targetgroup.Group{
Source: "OS_" + i.region,
}
- err = pager.EachPage(func(page pagination.Page) (bool, error) {
+ err = pager.EachPage(ctx, func(ctx context.Context, page pagination.Page) (bool, error) {
if ctx.Err() != nil {
return false, fmt.Errorf("could not extract instances: %w", ctx.Err())
}
@@ -134,7 +171,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
for _, s := range instanceList {
if len(s.Addresses) == 0 {
- level.Info(i.logger).Log("msg", "Got no IP address", "instance", s.ID)
+ i.logger.Info("Got no IP address", "instance", s.ID)
continue
}
@@ -151,7 +188,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
if !nameOk {
flavorID, idOk := s.Flavor["id"].(string)
if !idOk {
- level.Warn(i.logger).Log("msg", "Invalid type for both flavor original_name and flavor id, expected string")
+ i.logger.Warn("Invalid type for both flavor original_name and flavor id, expected string")
continue
}
labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorID)
@@ -171,22 +208,22 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
for pool, address := range s.Addresses {
md, ok := address.([]interface{})
if !ok {
- level.Warn(i.logger).Log("msg", "Invalid type for address, expected array")
+ i.logger.Warn("Invalid type for address, expected array")
continue
}
if len(md) == 0 {
- level.Debug(i.logger).Log("msg", "Got no IP address", "instance", s.ID)
+ i.logger.Debug("Got no IP address", "instance", s.ID)
continue
}
for _, address := range md {
md1, ok := address.(map[string]interface{})
if !ok {
- level.Warn(i.logger).Log("msg", "Invalid type for address, expected dict")
+ i.logger.Warn("Invalid type for address, expected dict")
continue
}
addr, ok := md1["addr"].(string)
if !ok {
- level.Warn(i.logger).Log("msg", "Invalid type for address, expected string")
+ i.logger.Warn("Invalid type for address, expected string")
continue
}
if _, ok := floatingIPPresent[addr]; ok {
@@ -198,7 +235,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
}
lbls[openstackLabelAddressPool] = model.LabelValue(pool)
lbls[openstackLabelPrivateIP] = model.LabelValue(addr)
- if val, ok := floatingIPList[floatingIPKey{id: s.ID, fixed: addr}]; ok {
+ if val, ok := floatingIPList[floatingIPKey{deviceID: s.ID, fixed: addr}]; ok {
lbls[openstackLabelPublicIP] = model.LabelValue(val)
}
addr = net.JoinHostPort(addr, strconv.Itoa(i.port))
diff --git a/discovery/openstack/instance_test.go b/discovery/openstack/instance_test.go
index 2b5ac1b89e..0933b57067 100644
--- a/discovery/openstack/instance_test.go
+++ b/discovery/openstack/instance_test.go
@@ -32,6 +32,7 @@ func (s *OpenstackSDInstanceTestSuite) SetupTest(t *testing.T) {
s.Mock.HandleServerListSuccessfully()
s.Mock.HandleFloatingIPListSuccessfully()
+ s.Mock.HandlePortsListSuccessfully()
s.Mock.HandleVersionsSuccessfully()
s.Mock.HandleAuthSuccessfully()
@@ -66,7 +67,7 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Len(t, tg.Targets, 4)
+ require.Len(t, tg.Targets, 6)
for i, lbls := range []model.LabelSet{
{
@@ -119,6 +120,31 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
},
+ {
+ "__address__": model.LabelValue("10.0.0.33:0"),
+ "__meta_openstack_instance_flavor": model.LabelValue("m1.small"),
+ "__meta_openstack_instance_id": model.LabelValue("87caf8ed-d92a-41f6-9dcd-d1399e39899f"),
+ "__meta_openstack_instance_status": model.LabelValue("ACTIVE"),
+ "__meta_openstack_instance_name": model.LabelValue("merp-project2"),
+ "__meta_openstack_private_ip": model.LabelValue("10.0.0.33"),
+ "__meta_openstack_address_pool": model.LabelValue("private"),
+ "__meta_openstack_tag_env": model.LabelValue("prod"),
+ "__meta_openstack_project_id": model.LabelValue("b78fef2305934dbbbeb9a10b4c326f7a"),
+ "__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
+ },
+ {
+ "__address__": model.LabelValue("10.0.0.34:0"),
+ "__meta_openstack_instance_flavor": model.LabelValue("m1.small"),
+ "__meta_openstack_instance_id": model.LabelValue("87caf8ed-d92a-41f6-9dcd-d1399e39899f"),
+ "__meta_openstack_instance_status": model.LabelValue("ACTIVE"),
+ "__meta_openstack_instance_name": model.LabelValue("merp-project2"),
+ "__meta_openstack_private_ip": model.LabelValue("10.0.0.34"),
+ "__meta_openstack_address_pool": model.LabelValue("private"),
+ "__meta_openstack_tag_env": model.LabelValue("prod"),
+ "__meta_openstack_public_ip": model.LabelValue("10.10.10.24"),
+ "__meta_openstack_project_id": model.LabelValue("b78fef2305934dbbbeb9a10b4c326f7a"),
+ "__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
+ },
} {
t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
require.Equal(t, lbls, tg.Targets[i])
@@ -134,6 +160,5 @@ func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := hypervisor.refresh(ctx)
- require.Error(t, err)
- require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
+ require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
}
diff --git a/discovery/openstack/loadbalancer.go b/discovery/openstack/loadbalancer.go
new file mode 100644
index 0000000000..254b713cdd
--- /dev/null
+++ b/discovery/openstack/loadbalancer.go
@@ -0,0 +1,193 @@
+// Copyright 2017 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package openstack
+
+import (
+ "context"
+ "fmt"
+ "log/slog"
+ "net"
+ "strconv"
+ "strings"
+
+ "github.com/gophercloud/gophercloud/v2"
+ "github.com/gophercloud/gophercloud/v2/openstack"
+ "github.com/gophercloud/gophercloud/v2/openstack/loadbalancer/v2/listeners"
+ "github.com/gophercloud/gophercloud/v2/openstack/loadbalancer/v2/loadbalancers"
+ "github.com/gophercloud/gophercloud/v2/openstack/networking/v2/extensions/layer3/floatingips"
+ "github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
+
+ "github.com/prometheus/prometheus/discovery/targetgroup"
+)
+
+const (
+ openstackLabelLoadBalancerID = openstackLabelPrefix + "loadbalancer_id"
+ openstackLabelLoadBalancerName = openstackLabelPrefix + "loadbalancer_name"
+ openstackLabelLoadBalancerOperatingStatus = openstackLabelPrefix + "loadbalancer_operating_status"
+ openstackLabelLoadBalancerProvisioningStatus = openstackLabelPrefix + "loadbalancer_provisioning_status"
+ openstackLabelLoadBalancerAvailabilityZone = openstackLabelPrefix + "loadbalancer_availability_zone"
+ openstackLabelLoadBalancerFloatingIP = openstackLabelPrefix + "loadbalancer_floating_ip"
+ openstackLabelLoadBalancerVIP = openstackLabelPrefix + "loadbalancer_vip"
+ openstackLabelLoadBalancerProvider = openstackLabelPrefix + "loadbalancer_provider"
+ openstackLabelLoadBalancerTags = openstackLabelPrefix + "loadbalancer_tags"
+)
+
+// LoadBalancerDiscovery discovers OpenStack load balancers.
+type LoadBalancerDiscovery struct {
+ provider *gophercloud.ProviderClient
+ authOpts *gophercloud.AuthOptions
+ region string
+ logger *slog.Logger
+ availability gophercloud.Availability
+}
+
+// NewLoadBalancerDiscovery returns a new loadbalancer discovery.
+func newLoadBalancerDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
+ region string, availability gophercloud.Availability, l *slog.Logger,
+) *LoadBalancerDiscovery {
+ if l == nil {
+ l = promslog.NewNopLogger()
+ }
+ return &LoadBalancerDiscovery{
+ provider: provider, authOpts: opts,
+ region: region, availability: availability, logger: l,
+ }
+}
+
+func (i *LoadBalancerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
+ err := openstack.Authenticate(ctx, i.provider, *i.authOpts)
+ if err != nil {
+ return nil, fmt.Errorf("could not authenticate to OpenStack: %w", err)
+ }
+
+ client, err := openstack.NewLoadBalancerV2(i.provider, gophercloud.EndpointOpts{
+ Region: i.region, Availability: i.availability,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("could not create OpenStack load balancer session: %w", err)
+ }
+
+ networkClient, err := openstack.NewNetworkV2(i.provider, gophercloud.EndpointOpts{
+ Region: i.region, Availability: i.availability,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("could not create OpenStack network session: %w", err)
+ }
+
+ allPages, err := loadbalancers.List(client, loadbalancers.ListOpts{}).AllPages(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("failed to list load balancers: %w", err)
+ }
+
+ allLBs, err := loadbalancers.ExtractLoadBalancers(allPages)
+ if err != nil {
+ return nil, fmt.Errorf("failed to extract load balancers: %w", err)
+ }
+
+ // Fetch all listeners in one API call
+ listenerPages, err := listeners.List(client, listeners.ListOpts{}).AllPages(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("failed to list all listeners: %w", err)
+ }
+
+ allListeners, err := listeners.ExtractListeners(listenerPages)
+ if err != nil {
+ return nil, fmt.Errorf("failed to extract all listeners: %w", err)
+ }
+
+ // Create a map to group listeners by Load Balancer ID
+ listenerMap := make(map[string][]listeners.Listener)
+ for _, listener := range allListeners {
+ // Iterate through each associated Load Balancer ID in the Loadbalancers array
+ for _, lb := range listener.Loadbalancers {
+ listenerMap[lb.ID] = append(listenerMap[lb.ID], listener)
+ }
+ }
+
+ // Fetch all floating IPs
+ fipPages, err := floatingips.List(networkClient, floatingips.ListOpts{}).AllPages(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("failed to list floating IPs: %w", err)
+ }
+
+ allFIPs, err := floatingips.ExtractFloatingIPs(fipPages)
+ if err != nil {
+ return nil, fmt.Errorf("failed to extract floating IPs: %w", err)
+ }
+
+ // Create a map to associate floating IPs with their resource IDs
+ fipMap := make(map[string]string) // Key: LoadBalancerID/PortID, Value: Floating IP
+ for _, fip := range allFIPs {
+ if fip.PortID != "" {
+ fipMap[fip.PortID] = fip.FloatingIP
+ }
+ }
+
+ tg := &targetgroup.Group{
+ Source: "OS_" + i.region,
+ }
+
+ for _, lb := range allLBs {
+ // Retrieve listeners for this load balancer from the map
+ lbListeners, exists := listenerMap[lb.ID]
+ if !exists || len(lbListeners) == 0 {
+ i.logger.Debug("Got no listener", "loadbalancer", lb.ID)
+ continue
+ }
+
+ // Variable to store the port of the first PROMETHEUS listener
+ var listenerPort int
+ hasPrometheusListener := false
+
+ // Check if any listener has the PROMETHEUS protocol
+ for _, listener := range lbListeners {
+ if listener.Protocol == "PROMETHEUS" {
+ hasPrometheusListener = true
+ listenerPort = listener.ProtocolPort
+ break
+ }
+ }
+
+ // Skip LBs without PROMETHEUS listener protocol
+ if !hasPrometheusListener {
+ i.logger.Debug("Got no PROMETHEUS listener", "loadbalancer", lb.ID)
+ continue
+ }
+
+ labels := model.LabelSet{}
+ addr := net.JoinHostPort(lb.VipAddress, strconv.Itoa(listenerPort))
+ labels[model.AddressLabel] = model.LabelValue(addr)
+ labels[openstackLabelLoadBalancerID] = model.LabelValue(lb.ID)
+ labels[openstackLabelLoadBalancerName] = model.LabelValue(lb.Name)
+ labels[openstackLabelLoadBalancerOperatingStatus] = model.LabelValue(lb.OperatingStatus)
+ labels[openstackLabelLoadBalancerProvisioningStatus] = model.LabelValue(lb.ProvisioningStatus)
+ labels[openstackLabelLoadBalancerAvailabilityZone] = model.LabelValue(lb.AvailabilityZone)
+ labels[openstackLabelLoadBalancerVIP] = model.LabelValue(lb.VipAddress)
+ labels[openstackLabelLoadBalancerProvider] = model.LabelValue(lb.Provider)
+ labels[openstackLabelProjectID] = model.LabelValue(lb.ProjectID)
+
+ if len(lb.Tags) > 0 {
+ labels[openstackLabelLoadBalancerTags] = model.LabelValue(strings.Join(lb.Tags, ","))
+ }
+
+ if floatingIP, exists := fipMap[lb.VipPortID]; exists {
+ labels[openstackLabelLoadBalancerFloatingIP] = model.LabelValue(floatingIP)
+ }
+
+ tg.Targets = append(tg.Targets, labels)
+ }
+
+ return []*targetgroup.Group{tg}, nil
+}
diff --git a/discovery/openstack/loadbalancer_test.go b/discovery/openstack/loadbalancer_test.go
new file mode 100644
index 0000000000..eee21b9831
--- /dev/null
+++ b/discovery/openstack/loadbalancer_test.go
@@ -0,0 +1,137 @@
+// Copyright 2017 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package openstack
+
+import (
+ "context"
+ "fmt"
+ "testing"
+
+ "github.com/prometheus/common/model"
+ "github.com/stretchr/testify/require"
+)
+
+type OpenstackSDLoadBalancerTestSuite struct {
+ Mock *SDMock
+}
+
+func (s *OpenstackSDLoadBalancerTestSuite) SetupTest(t *testing.T) {
+ s.Mock = NewSDMock(t)
+ s.Mock.Setup()
+
+ s.Mock.HandleLoadBalancerListSuccessfully()
+ s.Mock.HandleListenersListSuccessfully()
+ s.Mock.HandleFloatingIPListSuccessfully()
+
+ s.Mock.HandleVersionsSuccessfully()
+ s.Mock.HandleAuthSuccessfully()
+}
+
+func (s *OpenstackSDLoadBalancerTestSuite) openstackAuthSuccess() (refresher, error) {
+ conf := SDConfig{
+ IdentityEndpoint: s.Mock.Endpoint(),
+ Password: "test",
+ Username: "test",
+ DomainName: "12345",
+ Region: "RegionOne",
+ Role: "loadbalancer",
+ }
+ return newRefresher(&conf, nil)
+}
+
+func TestOpenstackSDLoadBalancerRefresh(t *testing.T) {
+ mock := &OpenstackSDLoadBalancerTestSuite{}
+ mock.SetupTest(t)
+
+ instance, err := mock.openstackAuthSuccess()
+ require.NoError(t, err)
+
+ ctx := context.Background()
+ tgs, err := instance.refresh(ctx)
+
+ require.NoError(t, err)
+ require.Len(t, tgs, 1)
+
+ tg := tgs[0]
+ require.NotNil(t, tg)
+ require.NotNil(t, tg.Targets)
+ require.Len(t, tg.Targets, 4)
+
+ for i, lbls := range []model.LabelSet{
+ {
+ "__address__": model.LabelValue("10.0.0.32:9273"),
+ "__meta_openstack_loadbalancer_id": model.LabelValue("ef079b0c-e610-4dfb-b1aa-b49f07ac48e5"),
+ "__meta_openstack_loadbalancer_name": model.LabelValue("lb1"),
+ "__meta_openstack_loadbalancer_operating_status": model.LabelValue("ONLINE"),
+ "__meta_openstack_loadbalancer_provisioning_status": model.LabelValue("ACTIVE"),
+ "__meta_openstack_loadbalancer_availability_zone": model.LabelValue("az1"),
+ "__meta_openstack_loadbalancer_floating_ip": model.LabelValue("192.168.1.2"),
+ "__meta_openstack_loadbalancer_vip": model.LabelValue("10.0.0.32"),
+ "__meta_openstack_loadbalancer_provider": model.LabelValue("amphora"),
+ "__meta_openstack_loadbalancer_tags": model.LabelValue("tag1,tag2"),
+ "__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
+ },
+ {
+ "__address__": model.LabelValue("10.0.2.78:8080"),
+ "__meta_openstack_loadbalancer_id": model.LabelValue("d92c471e-8d3e-4b9f-b2b5-9c72a9e3ef54"),
+ "__meta_openstack_loadbalancer_name": model.LabelValue("lb3"),
+ "__meta_openstack_loadbalancer_operating_status": model.LabelValue("ONLINE"),
+ "__meta_openstack_loadbalancer_provisioning_status": model.LabelValue("ACTIVE"),
+ "__meta_openstack_loadbalancer_availability_zone": model.LabelValue("az3"),
+ "__meta_openstack_loadbalancer_floating_ip": model.LabelValue("192.168.3.4"),
+ "__meta_openstack_loadbalancer_vip": model.LabelValue("10.0.2.78"),
+ "__meta_openstack_loadbalancer_provider": model.LabelValue("amphora"),
+ "__meta_openstack_loadbalancer_tags": model.LabelValue("tag5,tag6"),
+ "__meta_openstack_project_id": model.LabelValue("ac57f03dba1a4fdebff3e67201bc7a85"),
+ },
+ {
+ "__address__": model.LabelValue("10.0.3.99:9090"),
+ "__meta_openstack_loadbalancer_id": model.LabelValue("f5c7e918-df38-4a5a-a7d4-d9c27ab2cf67"),
+ "__meta_openstack_loadbalancer_name": model.LabelValue("lb4"),
+ "__meta_openstack_loadbalancer_operating_status": model.LabelValue("ONLINE"),
+ "__meta_openstack_loadbalancer_provisioning_status": model.LabelValue("ACTIVE"),
+ "__meta_openstack_loadbalancer_availability_zone": model.LabelValue("az1"),
+ "__meta_openstack_loadbalancer_floating_ip": model.LabelValue("192.168.4.5"),
+ "__meta_openstack_loadbalancer_vip": model.LabelValue("10.0.3.99"),
+ "__meta_openstack_loadbalancer_provider": model.LabelValue("amphora"),
+ "__meta_openstack_project_id": model.LabelValue("fa8c372dfe4d4c92b0c4e3a2d9b3c9fa"),
+ },
+ {
+ "__address__": model.LabelValue("10.0.4.88:9876"),
+ "__meta_openstack_loadbalancer_id": model.LabelValue("e83a6d92-7a3e-4567-94b3-20c83b32a75e"),
+ "__meta_openstack_loadbalancer_name": model.LabelValue("lb5"),
+ "__meta_openstack_loadbalancer_operating_status": model.LabelValue("ONLINE"),
+ "__meta_openstack_loadbalancer_provisioning_status": model.LabelValue("ACTIVE"),
+ "__meta_openstack_loadbalancer_availability_zone": model.LabelValue("az4"),
+ "__meta_openstack_loadbalancer_vip": model.LabelValue("10.0.4.88"),
+ "__meta_openstack_loadbalancer_provider": model.LabelValue("amphora"),
+ "__meta_openstack_project_id": model.LabelValue("a5d3b2e1e6f34cd9a5f7c2f01a6b8e29"),
+ },
+ } {
+ t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
+ require.Equal(t, lbls, tg.Targets[i])
+ })
+ }
+}
+
+func TestOpenstackSDLoadBalancerRefreshWithDoneContext(t *testing.T) {
+ mock := &OpenstackSDLoadBalancerTestSuite{}
+ mock.SetupTest(t)
+
+ loadbalancer, _ := mock.openstackAuthSuccess()
+ ctx, cancel := context.WithCancel(context.Background())
+ cancel()
+ _, err := loadbalancer.refresh(ctx)
+ require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
+}
diff --git a/discovery/openstack/mock_test.go b/discovery/openstack/mock_test.go
index 4518f41166..34e09c710f 100644
--- a/discovery/openstack/mock_test.go
+++ b/discovery/openstack/mock_test.go
@@ -62,7 +62,7 @@ func testHeader(t *testing.T, r *http.Request, header, expected string) {
// HandleVersionsSuccessfully mocks version call.
func (m *SDMock) HandleVersionsSuccessfully() {
- m.Mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+ m.Mux.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) {
fmt.Fprintf(w, `
{
"versions": {
@@ -90,7 +90,7 @@ func (m *SDMock) HandleVersionsSuccessfully() {
// HandleAuthSuccessfully mocks auth call.
func (m *SDMock) HandleAuthSuccessfully() {
- m.Mux.HandleFunc("/v3/auth/tokens", func(w http.ResponseWriter, r *http.Request) {
+ m.Mux.HandleFunc("/v3/auth/tokens", func(w http.ResponseWriter, _ *http.Request) {
w.Header().Add("X-Subject-Token", tokenID)
w.WriteHeader(http.StatusCreated)
@@ -124,7 +124,7 @@ func (m *SDMock) HandleAuthSuccessfully() {
"type": "identity",
"name": "keystone"
},
- {
+ {
"endpoints": [
{
"id": "e2ffee808abc4a60916715b1d4b489dd",
@@ -136,8 +136,33 @@ func (m *SDMock) HandleAuthSuccessfully() {
],
"id": "b7f2a5b1a019459cb956e43a8cb41e31",
"type": "compute"
+ },
+ {
+ "endpoints": [
+ {
+ "id": "5448e46679564d7d95466c2bef54c296",
+ "interface": "public",
+ "region": "RegionOne",
+ "region_id": "RegionOne",
+ "url": "%s"
+ }
+ ],
+ "id": "589f3d99a3d94f5f871e9f5cf206d2e8",
+ "type": "network"
+ },
+ {
+ "endpoints": [
+ {
+ "id": "39dc322ce86c1234b4f06c2eeae0841b",
+ "interface": "public",
+ "region": "RegionOne",
+ "region_id": "RegionOne",
+ "url": "%s"
+ }
+ ],
+ "id": "26968f704a68417bbddd29508455ff90",
+ "type": "load-balancer"
}
-
],
"expires_at": "2013-02-27T18:30:59.999999Z",
"is_domain": false,
@@ -174,7 +199,7 @@ func (m *SDMock) HandleAuthSuccessfully() {
}
}
}
- `, m.Endpoint())
+ `, m.Endpoint(), m.Endpoint(), m.Endpoint())
})
}
@@ -461,82 +486,159 @@ const serverListBody = `
"metadata": {}
},
{
- "status": "ACTIVE",
- "updated": "2014-09-25T13:04:49Z",
- "hostId": "29d3c8c896a45aa4c34e52247875d7fefc3d94bbcc9f622b5d204362",
- "OS-EXT-SRV-ATTR:host": "devstack",
- "addresses": {
- "private": [
- {
- "version": 4,
- "addr": "10.0.0.33",
- "OS-EXT-IPS:type": "fixed"
- },
- {
- "version": 4,
- "addr": "10.0.0.34",
- "OS-EXT-IPS:type": "fixed"
- },
- {
- "version": 4,
- "addr": "10.10.10.4",
- "OS-EXT-IPS:type": "floating"
- }
- ]
- },
- "links": [
- {
- "href": "http://104.130.131.164:8774/v2/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba",
- "rel": "self"
+ "status": "ACTIVE",
+ "updated": "2014-09-25T13:04:49Z",
+ "hostId": "29d3c8c896a45aa4c34e52247875d7fefc3d94bbcc9f622b5d204362",
+ "OS-EXT-SRV-ATTR:host": "devstack",
+ "addresses": {
+ "private": [
+ {
+ "version": 4,
+ "addr": "10.0.0.33",
+ "OS-EXT-IPS:type": "fixed"
+ },
+ {
+ "version": 4,
+ "addr": "10.0.0.34",
+ "OS-EXT-IPS:type": "fixed"
+ },
+ {
+ "version": 4,
+ "addr": "10.10.10.4",
+ "OS-EXT-IPS:type": "floating"
+ }
+ ]
},
- {
- "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba",
- "rel": "bookmark"
- }
- ],
- "key_name": null,
- "image": "",
- "OS-EXT-STS:task_state": null,
- "OS-EXT-STS:vm_state": "active",
- "OS-EXT-SRV-ATTR:instance_name": "instance-0000001d",
- "OS-SRV-USG:launched_at": "2014-09-25T13:04:49.000000",
- "OS-EXT-SRV-ATTR:hypervisor_hostname": "devstack",
- "flavor": {
- "vcpus": 2,
- "ram": 4096,
- "disk": 0,
- "ephemeral": 0,
- "swap": 0,
- "original_name": "m1.small",
- "extra_specs": {
- "aggregate_instance_extra_specs:general": "true",
- "hw:mem_page_size": "large",
- "hw:vif_multiqueue_enabled": "true"
+ "links": [
+ {
+ "href": "http://104.130.131.164:8774/v2/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba",
+ "rel": "self"
+ },
+ {
+ "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba",
+ "rel": "bookmark"
+ }
+ ],
+ "key_name": null,
+ "image": "",
+ "OS-EXT-STS:task_state": null,
+ "OS-EXT-STS:vm_state": "active",
+ "OS-EXT-SRV-ATTR:instance_name": "instance-0000001d",
+ "OS-SRV-USG:launched_at": "2014-09-25T13:04:49.000000",
+ "OS-EXT-SRV-ATTR:hypervisor_hostname": "devstack",
+ "flavor": {
+ "vcpus": 2,
+ "ram": 4096,
+ "disk": 0,
+ "ephemeral": 0,
+ "swap": 0,
+ "original_name": "m1.small",
+ "extra_specs": {
+ "aggregate_instance_extra_specs:general": "true",
+ "hw:mem_page_size": "large",
+ "hw:vif_multiqueue_enabled": "true"
+ }
+ },
+ "id": "9e5476bd-a4ec-4653-93d6-72c93aa682bb",
+ "security_groups": [
+ {
+ "name": "default"
+ }
+ ],
+ "OS-SRV-USG:terminated_at": null,
+ "OS-EXT-AZ:availability_zone": "nova",
+ "user_id": "9349aff8be7545ac9d2f1d00999a23cd",
+ "name": "merp",
+ "created": "2014-09-25T13:04:41Z",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "OS-DCF:diskConfig": "MANUAL",
+ "os-extended-volumes:volumes_attached": [],
+ "accessIPv4": "",
+ "accessIPv6": "",
+ "progress": 0,
+ "OS-EXT-STS:power_state": 1,
+ "config_drive": "",
+ "metadata": {
+ "env": "prod"
}
},
- "id": "9e5476bd-a4ec-4653-93d6-72c93aa682bb",
- "security_groups": [
- {
- "name": "default"
+ {
+ "status": "ACTIVE",
+ "updated": "2014-09-25T13:04:49Z",
+ "hostId": "29d3c8c896a45aa4c34e52247875d7fefc3d94bbcc9f622b5d204362",
+ "OS-EXT-SRV-ATTR:host": "devstack",
+ "addresses": {
+ "private": [
+ {
+ "version": 4,
+ "addr": "10.0.0.33",
+ "OS-EXT-IPS:type": "fixed"
+ },
+ {
+ "version": 4,
+ "addr": "10.0.0.34",
+ "OS-EXT-IPS:type": "fixed"
+ },
+ {
+ "version": 4,
+ "addr": "10.10.10.24",
+ "OS-EXT-IPS:type": "floating"
+ }
+ ]
+ },
+ "links": [
+ {
+ "href": "http://104.130.131.164:8774/v2/b78fef2305934dbbbeb9a10b4c326f7a/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba",
+ "rel": "self"
+ },
+ {
+ "href": "http://104.130.131.164:8774/b78fef2305934dbbbeb9a10b4c326f7a/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba",
+ "rel": "bookmark"
+ }
+ ],
+ "key_name": null,
+ "image": "",
+ "OS-EXT-STS:task_state": null,
+ "OS-EXT-STS:vm_state": "active",
+ "OS-EXT-SRV-ATTR:instance_name": "instance-0000002d",
+ "OS-SRV-USG:launched_at": "2014-09-25T13:04:49.000000",
+ "OS-EXT-SRV-ATTR:hypervisor_hostname": "devstack",
+ "flavor": {
+ "vcpus": 2,
+ "ram": 4096,
+ "disk": 0,
+ "ephemeral": 0,
+ "swap": 0,
+ "original_name": "m1.small",
+ "extra_specs": {
+ "aggregate_instance_extra_specs:general": "true",
+ "hw:mem_page_size": "large",
+ "hw:vif_multiqueue_enabled": "true"
+ }
+ },
+ "id": "87caf8ed-d92a-41f6-9dcd-d1399e39899f",
+ "security_groups": [
+ {
+ "name": "default"
+ }
+ ],
+ "OS-SRV-USG:terminated_at": null,
+ "OS-EXT-AZ:availability_zone": "nova",
+ "user_id": "9349aff8be7545ac9d2f1d00999a23cd",
+ "name": "merp-project2",
+ "created": "2014-09-25T13:04:41Z",
+ "tenant_id": "b78fef2305934dbbbeb9a10b4c326f7a",
+ "OS-DCF:diskConfig": "MANUAL",
+ "os-extended-volumes:volumes_attached": [],
+ "accessIPv4": "",
+ "accessIPv6": "",
+ "progress": 0,
+ "OS-EXT-STS:power_state": 1,
+ "config_drive": "",
+ "metadata": {
+ "env": "prod"
}
- ],
- "OS-SRV-USG:terminated_at": null,
- "OS-EXT-AZ:availability_zone": "nova",
- "user_id": "9349aff8be7545ac9d2f1d00999a23cd",
- "name": "merp",
- "created": "2014-09-25T13:04:41Z",
- "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
- "OS-DCF:diskConfig": "MANUAL",
- "os-extended-volumes:volumes_attached": [],
- "accessIPv4": "",
- "accessIPv6": "",
- "progress": 0,
- "OS-EXT-STS:power_state": 1,
- "config_drive": "",
- "metadata": {
- "env": "prod"
}
- }
]
}
`
@@ -554,35 +656,139 @@ func (m *SDMock) HandleServerListSuccessfully() {
const listOutput = `
{
- "floating_ips": [
- {
- "fixed_ip": null,
- "id": "1",
- "instance_id": null,
- "ip": "10.10.10.1",
- "pool": "nova"
- },
- {
- "fixed_ip": "10.0.0.32",
- "id": "2",
- "instance_id": "ef079b0c-e610-4dfb-b1aa-b49f07ac48e5",
- "ip": "10.10.10.2",
- "pool": "nova"
- },
- {
- "fixed_ip": "10.0.0.34",
- "id": "3",
- "instance_id": "9e5476bd-a4ec-4653-93d6-72c93aa682bb",
- "ip": "10.10.10.4",
- "pool": "nova"
- }
- ]
+ "floatingips": [
+ {
+ "id": "03a77860-ae03-46c4-b502-caea11467a79",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "floating_ip_address": "10.10.10.1",
+ "floating_network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "router_id": "f03af93b-4e8f-4f55-adcf-a0317782ede2",
+ "port_id": "d5597901-48c8-4a69-a041-cfc5be158a04",
+ "fixed_ip_address": null,
+ "status": "ACTIVE",
+ "description": "",
+ "dns_domain": "",
+ "dns_name": "",
+ "port_forwardings": [],
+ "tags": [],
+ "created_at": "2023-08-30T16:30:27Z",
+ "updated_at": "2023-08-30T16:30:28Z"
+ },
+ {
+ "id": "03e28c79-5a4c-491e-a4fe-3ff6bba830c6",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "floating_ip_address": "10.10.10.2",
+ "floating_network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "router_id": "f03af93b-4e8f-4f55-adcf-a0317782ede2",
+ "port_id": "4a45b012-0478-484d-8cf3-c8abdb194d08",
+ "fixed_ip_address": "10.0.0.32",
+ "status": "ACTIVE",
+ "description": "",
+ "dns_domain": "",
+ "dns_name": "",
+ "port_forwardings": [],
+ "tags": [],
+ "created_at": "2023-09-06T15:45:36Z",
+ "updated_at": "2023-09-06T15:45:36Z"
+ },
+ {
+ "id": "087fcdd2-1d13-4f72-9c0e-c759e796d558",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "floating_ip_address": "10.10.10.4",
+ "floating_network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "router_id": "f03af93b-4e8f-4f55-adcf-a0317782ede2",
+ "port_id": "a0e244e8-7910-4427-b8d1-20470cad4f8a",
+ "fixed_ip_address": "10.0.0.34",
+ "status": "ACTIVE",
+ "description": "",
+ "dns_domain": "",
+ "dns_name": "",
+ "port_forwardings": [],
+ "tags": [],
+ "created_at": "2024-01-24T13:30:50Z",
+ "updated_at": "2024-01-24T13:30:51Z"
+ },
+ {
+ "id": "b23df91a-a74a-4f75-b252-750aff4a5a0c",
+ "tenant_id": "b78fef2305934dbbbeb9a10b4c326f7a",
+ "floating_ip_address": "10.10.10.24",
+ "floating_network_id": "b19ff5bc-a49a-46cc-8d14-ca5f1e94791f",
+ "router_id": "65a5e5af-17f0-4124-9a81-c08b44f5b8a7",
+ "port_id": "b926ab68-ec54-46d8-8c50-1c07aafd5ae9",
+ "fixed_ip_address": "10.0.0.34",
+ "status": "ACTIVE",
+ "description": "",
+ "dns_domain": "",
+ "dns_name": "",
+ "port_forwardings": [],
+ "tags": [],
+ "created_at": "2024-01-24T13:30:50Z",
+ "updated_at": "2024-01-24T13:30:51Z"
+ },
+ {
+ "id": "fea7332d-9027-4cf9-bf62-c3c4c6ebaf84",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "floating_ip_address": "192.168.1.2",
+ "floating_network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "router_id": "f03af93b-4e8f-4f55-adcf-a0317782ede2",
+ "port_id": "b47c39f5-238d-4b17-ae87-9b5d19af8a2e",
+ "fixed_ip_address": "10.0.0.32",
+ "status": "ACTIVE",
+ "description": "",
+ "dns_domain": "",
+ "dns_name": "",
+ "port_forwardings": [],
+ "tags": [],
+ "created_at": "2023-08-30T15:11:37Z",
+ "updated_at": "2023-08-30T15:11:38Z",
+ "revision_number": 1,
+ "project_id": "fcad67a6189847c4aecfa3c81a05783b"
+ },
+ {
+ "id": "febb9554-cf83-4f9b-94d9-1b3c34be357f",
+ "tenant_id": "ac57f03dba1a4fdebff3e67201bc7a85",
+ "floating_ip_address": "192.168.3.4",
+ "floating_network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "router_id": "f03af93b-4e8f-4f55-adcf-a0317782ede2",
+ "port_id": "c83b6e12-4e5d-4673-a4b3-5bc72a7f3ef9",
+ "fixed_ip_address": "10.0.2.78",
+ "status": "ACTIVE",
+ "description": "",
+ "dns_domain": "",
+ "dns_name": "",
+ "port_forwardings": [],
+ "tags": [],
+ "created_at": "2023-08-30T15:11:37Z",
+ "updated_at": "2023-08-30T15:11:38Z",
+ "revision_number": 1,
+ "project_id": "ac57f03dba1a4fdebff3e67201bc7a85"
+ },
+ {
+ "id": "febb9554-cf83-4f9b-94d9-1b3c34be357f",
+ "tenant_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa",
+ "floating_ip_address": "192.168.4.5",
+ "floating_network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "router_id": "f03af93b-4e8f-4f55-adcf-a0317782ede2",
+ "port_id": "f9e8b6e12-7e4d-4963-a5b3-6cd82a7f3ff6",
+ "fixed_ip_address": "10.0.3.99",
+ "status": "ACTIVE",
+ "description": "",
+ "dns_domain": "",
+ "dns_name": "",
+ "port_forwardings": [],
+ "tags": [],
+ "created_at": "2023-08-30T15:11:37Z",
+ "updated_at": "2023-08-30T15:11:38Z",
+ "revision_number": 1,
+ "project_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa"
+ }
+ ]
}
`
// HandleFloatingIPListSuccessfully mocks floating ips call.
func (m *SDMock) HandleFloatingIPListSuccessfully() {
- m.Mux.HandleFunc("/os-floating-ips", func(w http.ResponseWriter, r *http.Request) {
+ m.Mux.HandleFunc("/v2.0/floatingips", func(w http.ResponseWriter, r *http.Request) {
testMethod(m.t, r, http.MethodGet)
testHeader(m.t, r, "X-Auth-Token", tokenID)
@@ -590,3 +796,608 @@ func (m *SDMock) HandleFloatingIPListSuccessfully() {
fmt.Fprint(w, listOutput)
})
}
+
+const portsListBody = `
+{
+ "ports": [
+ {
+ "id": "d5597901-48c8-4a69-a041-cfc5be158a04",
+ "name": "",
+ "network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "mac_address": "",
+ "admin_state_up": true,
+ "status": "DOWN",
+ "device_id": "",
+ "device_owner": "",
+ "fixed_ips": [],
+ "allowed_address_pairs": [],
+ "extra_dhcp_opts": [],
+ "security_groups": [],
+ "description": "",
+ "binding:vnic_type": "normal",
+ "port_security_enabled": true,
+ "dns_name": "",
+ "dns_assignment": [],
+ "dns_domain": "",
+ "tags": [],
+ "created_at": "2023-08-30T16:30:27Z",
+ "updated_at": "2023-08-30T16:30:28Z",
+ "revision_number": 0,
+ "project_id": "fcad67a6189847c4aecfa3c81a05783b"
+ },
+ {
+ "id": "4a45b012-0478-484d-8cf3-c8abdb194d08",
+ "name": "ovn-lb-vip-0980c8de-58c3-481d-89e3-ed81f44286c0",
+ "network_id": "03200a39-b399-44f3-a778-6dbb93343a31",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "mac_address": "fa:16:3e:23:12:a3",
+ "admin_state_up": true,
+ "status": "ACTIVE",
+ "device_id": "ef079b0c-e610-4dfb-b1aa-b49f07ac48e5",
+ "device_owner": "",
+ "fixed_ips": [
+ {
+ "subnet_id": "",
+ "ip_address": "10.10.10.2"
+ }
+ ],
+ "allowed_address_pairs": [],
+ "extra_dhcp_opts": [],
+ "security_groups": [],
+ "description": "",
+ "binding:vnic_type": "normal",
+ "port_security_enabled": true,
+ "dns_name": "",
+ "dns_assignment": [],
+ "dns_domain": "",
+ "tags": [],
+ "created_at": "2023-09-06T15:45:36Z",
+ "updated_at": "2023-09-06T15:45:36Z",
+ "revision_number": 0,
+ "project_id": "fcad67a6189847c4aecfa3c81a05783b"
+ },
+ {
+ "id": "a0e244e8-7910-4427-b8d1-20470cad4f8a",
+ "name": "ovn-lb-vip-26c0ccb1-3036-4345-99e8-d8f34a8ba6b2",
+ "network_id": "03200a39-b399-44f3-a778-6dbb93343a31",
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "mac_address": "fa:16:3e:5f:43:10",
+ "admin_state_up": true,
+ "status": "ACTIVE",
+ "device_id": "9e5476bd-a4ec-4653-93d6-72c93aa682bb",
+ "device_owner": "",
+ "fixed_ips": [
+ {
+ "subnet_id": "",
+ "ip_address": "10.10.10.4"
+ }
+ ],
+ "allowed_address_pairs": [],
+ "extra_dhcp_opts": [],
+ "security_groups": [],
+ "description": "",
+ "binding:vnic_type": "normal",
+ "port_security_enabled": true,
+ "dns_name": "",
+ "dns_assignment": [],
+ "dns_domain": "",
+ "tags": [],
+ "created_at": "2024-01-24T13:30:50Z",
+ "updated_at": "2024-01-24T13:30:51Z",
+ "revision_number": 0,
+ "project_id": "fcad67a6189847c4aecfa3c81a05783b"
+ },
+ {
+ "id": "b926ab68-ec54-46d8-8c50-1c07aafd5ae9",
+ "name": "dummy-port",
+ "network_id": "03200a39-b399-44f3-a778-6dbb93343a31",
+ "tenant_id": "b78fef2305934dbbbeb9a10b4c326f7a",
+ "mac_address": "fa:16:3e:5f:12:10",
+ "admin_state_up": true,
+ "status": "ACTIVE",
+ "device_id": "87caf8ed-d92a-41f6-9dcd-d1399e39899f",
+ "device_owner": "",
+ "fixed_ips": [
+ {
+ "subnet_id": "",
+ "ip_address": "10.10.10.24"
+ }
+ ],
+ "allowed_address_pairs": [],
+ "extra_dhcp_opts": [],
+ "security_groups": [],
+ "description": "",
+ "binding:vnic_type": "normal",
+ "port_security_enabled": true,
+ "dns_name": "",
+ "dns_assignment": [],
+ "dns_domain": "",
+ "tags": [],
+ "created_at": "2024-01-24T13:30:50Z",
+ "updated_at": "2024-01-24T13:30:51Z",
+ "revision_number": 0,
+ "project_id": "b78fef2305934dbbbeb9a10b4c326f7a"
+ }
+ ]
+}
+`
+
+// HandlePortsListSuccessfully mocks the ports list API.
+func (m *SDMock) HandlePortsListSuccessfully() {
+ m.Mux.HandleFunc("/v2.0/ports", func(w http.ResponseWriter, r *http.Request) {
+ testMethod(m.t, r, http.MethodGet)
+ testHeader(m.t, r, "X-Auth-Token", tokenID)
+
+ w.Header().Add("Content-Type", "application/json")
+ fmt.Fprint(w, portsListBody)
+ })
+}
+
+const lbListBody = `
+{
+ "loadbalancers": [
+ {
+ "id": "ef079b0c-e610-4dfb-b1aa-b49f07ac48e5",
+ "name": "lb1",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "project_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "created_at": "2024-12-01T10:00:00",
+ "updated_at": "2024-12-01T10:30:00",
+ "vip_address": "10.0.0.32",
+ "vip_port_id": "b47c39f5-238d-4b17-ae87-9b5d19af8a2e",
+ "vip_subnet_id": "14a4c6a5-fe71-4a94-9071-4cd12fb8337f",
+ "vip_network_id": "d02c4f18-d606-4864-b12a-1c9b39a46be2",
+ "tags": ["tag1", "tag2"],
+ "availability_zone": "az1",
+ "vip_vnic_type": "normal",
+ "provider": "amphora",
+ "listeners": [
+ {
+ "id": "c4146b54-febc-4caf-a53f-ed1cab6faba5"
+ },
+ {
+ "id": "a058d20e-82de-4eff-bb65-5c76a8554435"
+ }
+ ],
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b"
+ },
+ {
+ "id": "d92c471e-8d3e-4b9f-b2b5-9c72a9e3ef54",
+ "name": "lb3",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "project_id": "ac57f03dba1a4fdebff3e67201bc7a85",
+ "created_at": "2024-12-01T12:00:00",
+ "updated_at": "2024-12-01T12:45:00",
+ "vip_address": "10.0.2.78",
+ "vip_port_id": "c83b6e12-4e5d-4673-a4b3-5bc72a7f3ef9",
+ "vip_subnet_id": "36c5e9f6-e7a2-4975-a8c6-3b8e4f93cf45",
+ "vip_network_id": "g03c6f27-e617-4975-c8f7-4c9f3f94cf68",
+ "tags": ["tag5", "tag6"],
+ "availability_zone": "az3",
+ "vip_vnic_type": "normal",
+ "provider": "amphora",
+ "listeners": [
+ {
+ "id": "5b9529a4-6cbf-48f8-a006-d99cbc717da0"
+ },
+ {
+ "id": "5d26333b-74d1-4b2a-90ab-2b2c0f5a8048"
+ }
+ ],
+ "tenant_id": "ac57f03dba1a4fdebff3e67201bc7a85"
+ },
+ {
+ "id": "f5c7e918-df38-4a5a-a7d4-d9c27ab2cf67",
+ "name": "lb4",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "project_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa",
+ "created_at": "2024-12-01T13:00:00",
+ "updated_at": "2024-12-01T13:20:00",
+ "vip_address": "10.0.3.99",
+ "vip_port_id": "f9e8b6e12-7e4d-4963-a5b3-6cd82a7f3ff6",
+ "vip_subnet_id": "47d6f8f9-f7b2-4876-a9d8-4e8f4g95df79",
+ "vip_network_id": "h04d7f38-f718-4876-d9g8-5d8g5h95df89",
+ "tags": [],
+ "availability_zone": "az1",
+ "vip_vnic_type": "normal",
+ "provider": "amphora",
+ "listeners": [
+ {
+ "id": "84c87596-1ff0-4f6d-b151-0a78e1f407a3"
+ },
+ {
+ "id": "fe460a7c-16a9-4984-9fe6-f6e5153ebab1"
+ }
+ ],
+ "tenant_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa"
+ },
+ {
+ "id": "e83a6d92-7a3e-4567-94b3-20c83b32a75e",
+ "name": "lb5",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "project_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29",
+ "created_at": "2024-12-01T11:00:00",
+ "updated_at": "2024-12-01T11:15:00",
+ "vip_address": "10.0.4.88",
+ "vip_port_id": "d83a6d92-7a3e-4567-94b3-20c83b32a75e",
+ "vip_subnet_id": "25b4d8e5-fe81-4a87-9071-4cc12fb8337f",
+ "vip_network_id": "f02c5e19-c507-4864-b16e-2b7a39e56be3",
+ "tags": [],
+ "availability_zone": "az4",
+ "vip_vnic_type": "normal",
+ "provider": "amphora",
+ "listeners": [
+ {
+ "id": "50902e62-34b8-46b2-9ed4-9053e7ad46dc"
+ },
+ {
+ "id": "98a867ad-ff07-4880-b05f-32088866a68a"
+ }
+ ],
+ "tenant_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29"
+ }
+ ]
+}
+`
+
+// HandleLoadBalancerListSuccessfully mocks the load balancer list API.
+func (m *SDMock) HandleLoadBalancerListSuccessfully() {
+ m.Mux.HandleFunc("/v2.0/lbaas/loadbalancers", func(w http.ResponseWriter, r *http.Request) {
+ testMethod(m.t, r, http.MethodGet)
+ testHeader(m.t, r, "X-Auth-Token", tokenID)
+
+ w.Header().Add("Content-Type", "application/json")
+ fmt.Fprint(w, lbListBody)
+ })
+}
+
+const listenerListBody = `
+{
+ "listeners": [
+ {
+ "id": "c4146b54-febc-4caf-a53f-ed1cab6faba5",
+ "name": "stats-listener",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "PROMETHEUS",
+ "protocol_port": 9273,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "fcad67a6189847c4aecfa3c81a05783b",
+ "default_pool_id": null,
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-08-29T18:05:24",
+ "updated_at": "2024-12-04T21:21:10",
+ "loadbalancers": [
+ {
+ "id": "ef079b0c-e610-4dfb-b1aa-b49f07ac48e5"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "fcad67a6189847c4aecfa3c81a05783b"
+ },
+ {
+ "id": "5b9529a4-6cbf-48f8-a006-d99cbc717da0",
+ "name": "stats-listener2",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "PROMETHEUS",
+ "protocol_port": 8080,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "ac57f03dba1a4fdebff3e67201bc7a85",
+ "default_pool_id": null,
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-08-29T18:05:24",
+ "updated_at": "2024-12-04T21:21:10",
+ "loadbalancers": [
+ {
+ "id": "d92c471e-8d3e-4b9f-b2b5-9c72a9e3ef54"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "ac57f03dba1a4fdebff3e67201bc7a85"
+ },
+ {
+ "id": "84c87596-1ff0-4f6d-b151-0a78e1f407a3",
+ "name": "stats-listener3",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "PROMETHEUS",
+ "protocol_port": 9090,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa",
+ "default_pool_id": null,
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-08-29T18:05:24",
+ "updated_at": "2024-12-04T21:21:10",
+ "loadbalancers": [
+ {
+ "id": "f5c7e918-df38-4a5a-a7d4-d9c27ab2cf67"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa"
+ },
+ {
+ "id": "50902e62-34b8-46b2-9ed4-9053e7ad46dc",
+ "name": "stats-listener4",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "PROMETHEUS",
+ "protocol_port": 9876,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29",
+ "default_pool_id": null,
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-08-29T18:05:24",
+ "updated_at": "2024-12-04T21:21:10",
+ "loadbalancers": [
+ {
+ "id": "e83a6d92-7a3e-4567-94b3-20c83b32a75e"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29"
+ },
+ {
+ "id": "a058d20e-82de-4eff-bb65-5c76a8554435",
+ "name": "port6443",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "TCP",
+ "protocol_port": 6443,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29",
+ "default_pool_id": "5643208b-b691-4b1f-a6b8-356f14903e56",
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-10-02T19:32:48",
+ "updated_at": "2024-12-04T21:44:34",
+ "loadbalancers": [
+ {
+ "id": "ef079b0c-e610-4dfb-b1aa-b49f07ac48e5"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29"
+ },
+ {
+ "id": "5d26333b-74d1-4b2a-90ab-2b2c0f5a8048",
+ "name": "port6444",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "TCP",
+ "protocol_port": 6444,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "ac57f03dba1a4fdebff3e67201bc7a85",
+ "default_pool_id": "5643208b-b691-4b1f-a6b8-356f14903e56",
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-10-02T19:32:48",
+ "updated_at": "2024-12-04T21:44:34",
+ "loadbalancers": [
+ {
+ "id": "d92c471e-8d3e-4b9f-b2b5-9c72a9e3ef54"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "ac57f03dba1a4fdebff3e67201bc7a85"
+ },
+ {
+ "id": "fe460a7c-16a9-4984-9fe6-f6e5153ebab1",
+ "name": "port6445",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "TCP",
+ "protocol_port": 6445,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa",
+ "default_pool_id": "5643208b-b691-4b1f-a6b8-356f14903e56",
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-10-02T19:32:48",
+ "updated_at": "2024-12-04T21:44:34",
+ "loadbalancers": [
+ {
+ "id": "f5c7e918-df38-4a5a-a7d4-d9c27ab2cf67"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "fa8c372dfe4d4c92b0c4e3a2d9b3c9fa"
+ },
+ {
+ "id": "98a867ad-ff07-4880-b05f-32088866a68a",
+ "name": "port6446",
+ "description": "",
+ "provisioning_status": "ACTIVE",
+ "operating_status": "ONLINE",
+ "admin_state_up": true,
+ "protocol": "TCP",
+ "protocol_port": 6446,
+ "connection_limit": -1,
+ "default_tls_container_ref": null,
+ "sni_container_refs": [],
+ "project_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29",
+ "default_pool_id": "5643208b-b691-4b1f-a6b8-356f14903e56",
+ "l7policies": [],
+ "insert_headers": {},
+ "created_at": "2024-10-02T19:32:48",
+ "updated_at": "2024-12-04T21:44:34",
+ "loadbalancers": [
+ {
+ "id": "e83a6d92-7a3e-4567-94b3-20c83b32a75e"
+ }
+ ],
+ "timeout_client_data": 50000,
+ "timeout_member_connect": 5000,
+ "timeout_member_data": 50000,
+ "timeout_tcp_inspect": 0,
+ "tags": [],
+ "client_ca_tls_container_ref": null,
+ "client_authentication": "NONE",
+ "client_crl_container_ref": null,
+ "allowed_cidrs": null,
+ "tls_ciphers": null,
+ "tls_versions": null,
+ "alpn_protocols": null,
+ "hsts_max_age": null,
+ "hsts_include_subdomains": null,
+ "hsts_preload": null,
+ "tenant_id": "a5d3b2e1e6f34cd9a5f7c2f01a6b8e29"
+ }
+ ]
+}
+`
+
+// HandleListenersListSuccessfully mocks the listeners endpoint.
+func (m *SDMock) HandleListenersListSuccessfully() {
+ m.Mux.HandleFunc("/v2.0/lbaas/listeners", func(w http.ResponseWriter, r *http.Request) {
+ testMethod(m.t, r, http.MethodGet)
+ testHeader(m.t, r, "X-Auth-Token", tokenID)
+
+ w.Header().Add("Content-Type", "application/json")
+ fmt.Fprint(w, listenerListBody)
+ })
+}
diff --git a/discovery/openstack/openstack.go b/discovery/openstack/openstack.go
index c98f78788d..d7b58787a1 100644
--- a/discovery/openstack/openstack.go
+++ b/discovery/openstack/openstack.go
@@ -17,12 +17,12 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net/http"
"time"
- "github.com/go-kit/log"
- "github.com/gophercloud/gophercloud"
- "github.com/gophercloud/gophercloud/openstack"
+ "github.com/gophercloud/gophercloud/v2"
+ "github.com/gophercloud/gophercloud/v2/openstack"
"github.com/mwitkow/go-conntrack"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -67,7 +67,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &openstackMetrics{
refreshMetrics: rmi,
}
@@ -97,6 +97,9 @@ const (
// OpenStack document reference
// https://docs.openstack.org/horizon/pike/user/launch-instances.html
OpenStackRoleInstance Role = "instance"
+ // Openstack document reference
+ // https://docs.openstack.org/openstacksdk/rocky/user/resources/load_balancer/index.html
+ OpenStackRoleLoadBalancer Role = "loadbalancer"
)
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -105,7 +108,7 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err
}
switch *c {
- case OpenStackRoleHypervisor, OpenStackRoleInstance:
+ case OpenStackRoleHypervisor, OpenStackRoleInstance, OpenStackRoleLoadBalancer:
return nil
default:
return fmt.Errorf("unknown OpenStack SD role %q", *c)
@@ -128,7 +131,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
if c.Role == "" {
- return errors.New("role missing (one of: instance, hypervisor)")
+ return errors.New("role missing (one of: instance, hypervisor, loadbalancer)")
}
if c.Region == "" {
return errors.New("openstack SD configuration requires a region")
@@ -142,10 +145,10 @@ type refresher interface {
}
// NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, l *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*openstackMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
r, err := newRefresher(conf, l)
@@ -163,7 +166,7 @@ func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetr
), nil
}
-func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
+func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) {
var opts gophercloud.AuthOptions
if conf.IdentityEndpoint == "" {
var err error
@@ -211,6 +214,8 @@ func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
return newHypervisorDiscovery(client, &opts, conf.Port, conf.Region, availability, l), nil
case OpenStackRoleInstance:
return newInstanceDiscovery(client, &opts, conf.Port, conf.Region, conf.AllTenants, availability, l), nil
+ case OpenStackRoleLoadBalancer:
+ return newLoadBalancerDiscovery(client, &opts, conf.Region, availability, l), nil
}
return nil, errors.New("unknown OpenStack discovery role")
}
diff --git a/discovery/ovhcloud/dedicated_server.go b/discovery/ovhcloud/dedicated_server.go
index a70857a08b..15bb9809c9 100644
--- a/discovery/ovhcloud/dedicated_server.go
+++ b/discovery/ovhcloud/dedicated_server.go
@@ -16,13 +16,12 @@ package ovhcloud
import (
"context"
"fmt"
+ "log/slog"
"net/netip"
"net/url"
"path"
"strconv"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/ovh/go-ovh/ovh"
"github.com/prometheus/common/model"
@@ -55,10 +54,10 @@ type dedicatedServer struct {
type dedicatedServerDiscovery struct {
*refresh.Discovery
config *SDConfig
- logger log.Logger
+ logger *slog.Logger
}
-func newDedicatedServerDiscovery(conf *SDConfig, logger log.Logger) *dedicatedServerDiscovery {
+func newDedicatedServerDiscovery(conf *SDConfig, logger *slog.Logger) *dedicatedServerDiscovery {
return &dedicatedServerDiscovery{config: conf, logger: logger}
}
@@ -115,10 +114,7 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou
for _, dedicatedServerName := range dedicatedServerList {
dedicatedServer, err := getDedicatedServerDetails(client, dedicatedServerName)
if err != nil {
- err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error())
- if err != nil {
- return nil, err
- }
+ d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error())
continue
}
dedicatedServerDetailedList = append(dedicatedServerDetailedList, *dedicatedServer)
diff --git a/discovery/ovhcloud/dedicated_server_test.go b/discovery/ovhcloud/dedicated_server_test.go
index 52311bcc87..f9dbd6af9c 100644
--- a/discovery/ovhcloud/dedicated_server_test.go
+++ b/discovery/ovhcloud/dedicated_server_test.go
@@ -21,8 +21,8 @@ import (
"os"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
)
@@ -41,7 +41,7 @@ application_secret: %s
consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecretTest, ovhcloudConsumerKeyTest)
require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg))
- d, err := newRefresher(&cfg, log.NewNopLogger())
+ d, err := newRefresher(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
ctx := context.Background()
targetGroups, err := d.refresh(ctx)
diff --git a/discovery/ovhcloud/ovhcloud.go b/discovery/ovhcloud/ovhcloud.go
index 988b4482f2..492bca603a 100644
--- a/discovery/ovhcloud/ovhcloud.go
+++ b/discovery/ovhcloud/ovhcloud.go
@@ -17,10 +17,10 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net/netip"
"time"
- "github.com/go-kit/log"
"github.com/ovh/go-ovh/ovh"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -54,7 +54,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &ovhcloudMetrics{
refreshMetrics: rmi,
}
@@ -137,7 +137,7 @@ func parseIPList(ipList []string) ([]netip.Addr, error) {
return ipAddresses, nil
}
-func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) {
+func newRefresher(conf *SDConfig, logger *slog.Logger) (refresher, error) {
switch conf.Service {
case "vps":
return newVpsDiscovery(conf, logger), nil
@@ -148,10 +148,10 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) {
}
// NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*ovhcloudMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
r, err := newRefresher(conf, logger)
diff --git a/discovery/ovhcloud/ovhcloud_test.go b/discovery/ovhcloud/ovhcloud_test.go
index 9c95bf90e6..84a35af3ad 100644
--- a/discovery/ovhcloud/ovhcloud_test.go
+++ b/discovery/ovhcloud/ovhcloud_test.go
@@ -20,11 +20,11 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
- "github.com/prometheus/prometheus/util/testutil"
)
var (
@@ -121,7 +121,7 @@ func TestParseIPs(t *testing.T) {
func TestDiscoverer(t *testing.T) {
conf, _ := getMockConf("vps")
- logger := testutil.NewLogger(t)
+ logger := promslog.NewNopLogger()
reg := prometheus.NewRegistry()
refreshMetrics := discovery.NewRefreshMetrics(reg)
diff --git a/discovery/ovhcloud/vps.go b/discovery/ovhcloud/vps.go
index 58ceeabd87..7050f826a5 100644
--- a/discovery/ovhcloud/vps.go
+++ b/discovery/ovhcloud/vps.go
@@ -16,13 +16,12 @@ package ovhcloud
import (
"context"
"fmt"
+ "log/slog"
"net/netip"
"net/url"
"path"
"strconv"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/ovh/go-ovh/ovh"
"github.com/prometheus/common/model"
@@ -68,10 +67,10 @@ type virtualPrivateServer struct {
type vpsDiscovery struct {
*refresh.Discovery
config *SDConfig
- logger log.Logger
+ logger *slog.Logger
}
-func newVpsDiscovery(conf *SDConfig, logger log.Logger) *vpsDiscovery {
+func newVpsDiscovery(conf *SDConfig, logger *slog.Logger) *vpsDiscovery {
return &vpsDiscovery{config: conf, logger: logger}
}
@@ -133,10 +132,7 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) {
for _, vpsName := range vpsList {
vpsDetailed, err := getVpsDetails(client, vpsName)
if err != nil {
- err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error())
- if err != nil {
- return nil, err
- }
+ d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error())
continue
}
vpsDetailedList = append(vpsDetailedList, *vpsDetailed)
diff --git a/discovery/ovhcloud/vps_test.go b/discovery/ovhcloud/vps_test.go
index 2d2d6dcd21..d7a2a705c6 100644
--- a/discovery/ovhcloud/vps_test.go
+++ b/discovery/ovhcloud/vps_test.go
@@ -21,11 +21,10 @@ import (
"os"
"testing"
- yaml "gopkg.in/yaml.v2"
-
- "github.com/go-kit/log"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
+ yaml "gopkg.in/yaml.v2"
)
func TestOvhCloudVpsRefresh(t *testing.T) {
@@ -43,7 +42,7 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr
require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg))
- d, err := newRefresher(&cfg, log.NewNopLogger())
+ d, err := newRefresher(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
ctx := context.Background()
targetGroups, err := d.refresh(ctx)
diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go
index 8f89acbf93..e249bc4afa 100644
--- a/discovery/puppetdb/puppetdb.go
+++ b/discovery/puppetdb/puppetdb.go
@@ -17,8 +17,10 @@ import (
"bytes"
"context"
"encoding/json"
+ "errors"
"fmt"
"io"
+ "log/slog"
"net"
"net/http"
"net/url"
@@ -27,11 +29,11 @@ import (
"strings"
"time"
- "github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
"github.com/prometheus/prometheus/discovery"
@@ -62,7 +64,7 @@ var (
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
- userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
+ userAgent = version.PrometheusUserAgent()
)
func init() {
@@ -80,7 +82,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &puppetdbMetrics{
refreshMetrics: rmi,
}
@@ -108,20 +110,20 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err
}
if c.URL == "" {
- return fmt.Errorf("URL is missing")
+ return errors.New("URL is missing")
}
parsedURL, err := url.Parse(c.URL)
if err != nil {
return err
}
if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
- return fmt.Errorf("URL scheme must be 'http' or 'https'")
+ return errors.New("URL scheme must be 'http' or 'https'")
}
if parsedURL.Host == "" {
- return fmt.Errorf("host is missing in URL")
+ return errors.New("host is missing in URL")
}
if c.Query == "" {
- return fmt.Errorf("query missing")
+ return errors.New("query missing")
}
return c.HTTPClientConfig.Validate()
}
@@ -138,14 +140,14 @@ type Discovery struct {
}
// NewDiscovery returns a new PuppetDB discovery for the given config.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*puppetdbMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http")
diff --git a/discovery/puppetdb/puppetdb_test.go b/discovery/puppetdb/puppetdb_test.go
index bf9c7b215e..57e198e131 100644
--- a/discovery/puppetdb/puppetdb_test.go
+++ b/discovery/puppetdb/puppetdb_test.go
@@ -22,10 +22,10 @@ import (
"testing"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@@ -70,7 +70,7 @@ func TestPuppetSlashInURL(t *testing.T) {
metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics)
require.NoError(t, metrics.Register())
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
require.Equal(t, apiURL, d.url)
@@ -94,7 +94,7 @@ func TestPuppetDBRefresh(t *testing.T) {
metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics)
require.NoError(t, metrics.Register())
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -142,7 +142,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) {
metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics)
require.NoError(t, metrics.Register())
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -184,7 +184,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) {
}
func TestPuppetDBInvalidCode(t *testing.T) {
- ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusBadRequest)
}))
@@ -201,7 +201,7 @@ func TestPuppetDBInvalidCode(t *testing.T) {
metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics)
require.NoError(t, metrics.Register())
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@@ -212,7 +212,7 @@ func TestPuppetDBInvalidCode(t *testing.T) {
}
func TestPuppetDBInvalidFormat(t *testing.T) {
- ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
fmt.Fprintln(w, "{}")
}))
@@ -229,7 +229,7 @@ func TestPuppetDBInvalidFormat(t *testing.T) {
metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics)
require.NoError(t, metrics.Register())
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
diff --git a/discovery/refresh/refresh.go b/discovery/refresh/refresh.go
index f037a90cff..31646c0e4c 100644
--- a/discovery/refresh/refresh.go
+++ b/discovery/refresh/refresh.go
@@ -16,17 +16,17 @@ package refresh
import (
"context"
"errors"
+ "log/slog"
"time"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
type Options struct {
- Logger log.Logger
+ Logger *slog.Logger
Mech string
Interval time.Duration
RefreshF func(ctx context.Context) ([]*targetgroup.Group, error)
@@ -35,7 +35,7 @@ type Options struct {
// Discovery implements the Discoverer interface.
type Discovery struct {
- logger log.Logger
+ logger *slog.Logger
interval time.Duration
refreshf func(ctx context.Context) ([]*targetgroup.Group, error)
metrics *discovery.RefreshMetrics
@@ -45,9 +45,9 @@ type Discovery struct {
func NewDiscovery(opts Options) *Discovery {
m := opts.MetricsInstantiator.Instantiate(opts.Mech)
- var logger log.Logger
+ var logger *slog.Logger
if opts.Logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
} else {
logger = opts.Logger
}
@@ -68,7 +68,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
tgs, err := d.refresh(ctx)
if err != nil {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
+ d.logger.Error("Unable to refresh target groups", "err", err.Error())
}
} else {
select {
@@ -87,7 +87,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
tgs, err := d.refresh(ctx)
if err != nil {
if !errors.Is(ctx.Err(), context.Canceled) {
- level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
+ d.logger.Error("Unable to refresh target groups", "err", err.Error())
}
continue
}
diff --git a/discovery/refresh/refresh_test.go b/discovery/refresh/refresh_test.go
index b70a326355..7c57d0532a 100644
--- a/discovery/refresh/refresh_test.go
+++ b/discovery/refresh/refresh_test.go
@@ -15,7 +15,7 @@ package refresh
import (
"context"
- "fmt"
+ "errors"
"testing"
"time"
@@ -56,7 +56,7 @@ func TestRefresh(t *testing.T) {
}
var i int
- refresh := func(ctx context.Context) ([]*targetgroup.Group, error) {
+ refresh := func(_ context.Context) ([]*targetgroup.Group, error) {
i++
switch i {
case 1:
@@ -64,7 +64,7 @@ func TestRefresh(t *testing.T) {
case 2:
return tg2, nil
}
- return nil, fmt.Errorf("some error")
+ return nil, errors.New("some error")
}
interval := time.Millisecond
diff --git a/discovery/registry.go b/discovery/registry.go
index 1f491d4ca9..93b88ccfab 100644
--- a/discovery/registry.go
+++ b/discovery/registry.go
@@ -22,9 +22,8 @@ import (
"strings"
"sync"
- "gopkg.in/yaml.v2"
-
"github.com/prometheus/client_golang/prometheus"
+ "gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
@@ -267,7 +266,7 @@ func replaceYAMLTypeError(err error, oldTyp, newTyp reflect.Type) error {
func RegisterSDMetrics(registerer prometheus.Registerer, rmm RefreshMetricsManager) (map[string]DiscovererMetrics, error) {
err := rmm.Register()
if err != nil {
- return nil, fmt.Errorf("failed to create service discovery refresh metrics")
+ return nil, errors.New("failed to create service discovery refresh metrics")
}
metrics := make(map[string]DiscovererMetrics)
@@ -275,7 +274,7 @@ func RegisterSDMetrics(registerer prometheus.Registerer, rmm RefreshMetricsManag
currentSdMetrics := conf.NewDiscovererMetrics(registerer, rmm)
err = currentSdMetrics.Register()
if err != nil {
- return nil, fmt.Errorf("failed to create service discovery metrics")
+ return nil, errors.New("failed to create service discovery metrics")
}
metrics[conf.Name()] = currentSdMetrics
}
diff --git a/discovery/scaleway/baremetal.go b/discovery/scaleway/baremetal.go
index c313e6695d..06f13532df 100644
--- a/discovery/scaleway/baremetal.go
+++ b/discovery/scaleway/baremetal.go
@@ -93,7 +93,7 @@ func newBaremetalDiscovery(conf *SDConfig) (*baremetalDiscovery, error) {
Transport: rt,
Timeout: time.Duration(conf.RefreshInterval),
}),
- scw.WithUserAgent(fmt.Sprintf("Prometheus/%s", version.Version)),
+ scw.WithUserAgent(version.PrometheusUserAgent()),
scw.WithProfile(profile),
)
if err != nil {
diff --git a/discovery/scaleway/instance.go b/discovery/scaleway/instance.go
index 2542c63253..162a75e407 100644
--- a/discovery/scaleway/instance.go
+++ b/discovery/scaleway/instance.go
@@ -35,28 +35,30 @@ import (
const (
instanceLabelPrefix = metaLabelPrefix + "instance_"
- instanceBootTypeLabel = instanceLabelPrefix + "boot_type"
- instanceHostnameLabel = instanceLabelPrefix + "hostname"
- instanceIDLabel = instanceLabelPrefix + "id"
- instanceImageArchLabel = instanceLabelPrefix + "image_arch"
- instanceImageIDLabel = instanceLabelPrefix + "image_id"
- instanceImageNameLabel = instanceLabelPrefix + "image_name"
- instanceLocationClusterID = instanceLabelPrefix + "location_cluster_id"
- instanceLocationHypervisorID = instanceLabelPrefix + "location_hypervisor_id"
- instanceLocationNodeID = instanceLabelPrefix + "location_node_id"
- instanceNameLabel = instanceLabelPrefix + "name"
- instanceOrganizationLabel = instanceLabelPrefix + "organization_id"
- instancePrivateIPv4Label = instanceLabelPrefix + "private_ipv4"
- instanceProjectLabel = instanceLabelPrefix + "project_id"
- instancePublicIPv4Label = instanceLabelPrefix + "public_ipv4"
- instancePublicIPv6Label = instanceLabelPrefix + "public_ipv6"
- instanceSecurityGroupIDLabel = instanceLabelPrefix + "security_group_id"
- instanceSecurityGroupNameLabel = instanceLabelPrefix + "security_group_name"
- instanceStateLabel = instanceLabelPrefix + "status"
- instanceTagsLabel = instanceLabelPrefix + "tags"
- instanceTypeLabel = instanceLabelPrefix + "type"
- instanceZoneLabel = instanceLabelPrefix + "zone"
- instanceRegionLabel = instanceLabelPrefix + "region"
+ instanceBootTypeLabel = instanceLabelPrefix + "boot_type"
+ instanceHostnameLabel = instanceLabelPrefix + "hostname"
+ instanceIDLabel = instanceLabelPrefix + "id"
+ instanceImageArchLabel = instanceLabelPrefix + "image_arch"
+ instanceImageIDLabel = instanceLabelPrefix + "image_id"
+ instanceImageNameLabel = instanceLabelPrefix + "image_name"
+ instanceLocationClusterID = instanceLabelPrefix + "location_cluster_id"
+ instanceLocationHypervisorID = instanceLabelPrefix + "location_hypervisor_id"
+ instanceLocationNodeID = instanceLabelPrefix + "location_node_id"
+ instanceNameLabel = instanceLabelPrefix + "name"
+ instanceOrganizationLabel = instanceLabelPrefix + "organization_id"
+ instancePrivateIPv4Label = instanceLabelPrefix + "private_ipv4"
+ instanceProjectLabel = instanceLabelPrefix + "project_id"
+ instancePublicIPv4Label = instanceLabelPrefix + "public_ipv4"
+ instancePublicIPv6Label = instanceLabelPrefix + "public_ipv6"
+ instancePublicIPv4AddressesLabel = instanceLabelPrefix + "public_ipv4_addresses"
+ instancePublicIPv6AddressesLabel = instanceLabelPrefix + "public_ipv6_addresses"
+ instanceSecurityGroupIDLabel = instanceLabelPrefix + "security_group_id"
+ instanceSecurityGroupNameLabel = instanceLabelPrefix + "security_group_name"
+ instanceStateLabel = instanceLabelPrefix + "status"
+ instanceTagsLabel = instanceLabelPrefix + "tags"
+ instanceTypeLabel = instanceLabelPrefix + "type"
+ instanceZoneLabel = instanceLabelPrefix + "zone"
+ instanceRegionLabel = instanceLabelPrefix + "region"
)
type instanceDiscovery struct {
@@ -104,7 +106,7 @@ func newInstanceDiscovery(conf *SDConfig) (*instanceDiscovery, error) {
Transport: rt,
Timeout: time.Duration(conf.RefreshInterval),
}),
- scw.WithUserAgent(fmt.Sprintf("Prometheus/%s", version.Version)),
+ scw.WithUserAgent(version.PrometheusUserAgent()),
scw.WithProfile(profile),
)
if err != nil {
@@ -175,14 +177,43 @@ func (d *instanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
}
addr := ""
+ if len(server.PublicIPs) > 0 {
+ var ipv4Addresses []string
+ var ipv6Addresses []string
+
+ for _, ip := range server.PublicIPs {
+ switch ip.Family {
+ case instance.ServerIPIPFamilyInet:
+ ipv4Addresses = append(ipv4Addresses, ip.Address.String())
+ case instance.ServerIPIPFamilyInet6:
+ ipv6Addresses = append(ipv6Addresses, ip.Address.String())
+ }
+ }
+
+ if len(ipv6Addresses) > 0 {
+ labels[instancePublicIPv6AddressesLabel] = model.LabelValue(
+ separator +
+ strings.Join(ipv6Addresses, separator) +
+ separator)
+ }
+ if len(ipv4Addresses) > 0 {
+ labels[instancePublicIPv4AddressesLabel] = model.LabelValue(
+ separator +
+ strings.Join(ipv4Addresses, separator) +
+ separator)
+ }
+ }
+
if server.IPv6 != nil { //nolint:staticcheck
labels[instancePublicIPv6Label] = model.LabelValue(server.IPv6.Address.String()) //nolint:staticcheck
addr = server.IPv6.Address.String() //nolint:staticcheck
}
if server.PublicIP != nil { //nolint:staticcheck
- labels[instancePublicIPv4Label] = model.LabelValue(server.PublicIP.Address.String()) //nolint:staticcheck
- addr = server.PublicIP.Address.String() //nolint:staticcheck
+ if server.PublicIP.Family != instance.ServerIPIPFamilyInet6 { //nolint:staticcheck
+ labels[instancePublicIPv4Label] = model.LabelValue(server.PublicIP.Address.String()) //nolint:staticcheck
+ }
+ addr = server.PublicIP.Address.String() //nolint:staticcheck
}
if server.PrivateIP != nil {
diff --git a/discovery/scaleway/instance_test.go b/discovery/scaleway/instance_test.go
index ae70a9ed25..11ef36d353 100644
--- a/discovery/scaleway/instance_test.go
+++ b/discovery/scaleway/instance_test.go
@@ -60,7 +60,7 @@ api_url: %s
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Len(t, tg.Targets, 3)
+ require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{
@@ -125,6 +125,8 @@ api_url: %s
"__meta_scaleway_instance_organization_id": "20b3d507-96ac-454c-a795-bc731b46b12f",
"__meta_scaleway_instance_project_id": "20b3d507-96ac-454c-a795-bc731b46b12f",
"__meta_scaleway_instance_public_ipv4": "51.158.183.115",
+ "__meta_scaleway_instance_public_ipv4_addresses": ",51.158.183.115,",
+ "__meta_scaleway_instance_public_ipv6_addresses": ",2001:bc8:1640:1568:dc00:ff:fe21:91b,",
"__meta_scaleway_instance_region": "nl-ams",
"__meta_scaleway_instance_security_group_id": "984414da-9fc2-49c0-a925-fed6266fe092",
"__meta_scaleway_instance_security_group_name": "Default security group",
@@ -132,6 +134,30 @@ api_url: %s
"__meta_scaleway_instance_type": "DEV1-S",
"__meta_scaleway_instance_zone": "nl-ams-1",
},
+ {
+ "__address__": "163.172.136.10:80",
+ "__meta_scaleway_instance_boot_type": "local",
+ "__meta_scaleway_instance_hostname": "multiple-ips",
+ "__meta_scaleway_instance_id": "658abbf4-e6c6-4239-a483-3307763cf6e0",
+ "__meta_scaleway_instance_image_arch": "x86_64",
+ "__meta_scaleway_instance_image_id": "f583f58c-1ea5-44ab-a1e6-2b2e7df32a86",
+ "__meta_scaleway_instance_image_name": "Ubuntu 24.04 Noble Numbat",
+ "__meta_scaleway_instance_location_cluster_id": "7",
+ "__meta_scaleway_instance_location_hypervisor_id": "801",
+ "__meta_scaleway_instance_location_node_id": "95",
+ "__meta_scaleway_instance_name": "multiple-ips",
+ "__meta_scaleway_instance_organization_id": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b",
+ "__meta_scaleway_instance_project_id": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b",
+ "__meta_scaleway_instance_public_ipv4": "163.172.136.10",
+ "__meta_scaleway_instance_public_ipv4_addresses": ",163.172.136.10,212.47.248.223,51.15.231.134,",
+ "__meta_scaleway_instance_public_ipv6_addresses": ",2001:bc8:710:4a69:dc00:ff:fe58:40c1,2001:bc8:710:d::,2001:bc8:710:5417::,",
+ "__meta_scaleway_instance_region": "fr-par",
+ "__meta_scaleway_instance_security_group_id": "0fe819c3-274d-472a-b3f5-ddb258d2d8bb",
+ "__meta_scaleway_instance_security_group_name": "Default security group",
+ "__meta_scaleway_instance_status": "running",
+ "__meta_scaleway_instance_type": "PLAY2-PICO",
+ "__meta_scaleway_instance_zone": "fr-par-1",
+ },
} {
t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
require.Equal(t, lbls, tg.Targets[i])
diff --git a/discovery/scaleway/scaleway.go b/discovery/scaleway/scaleway.go
index f8e1a83f5e..47ac092000 100644
--- a/discovery/scaleway/scaleway.go
+++ b/discovery/scaleway/scaleway.go
@@ -17,12 +17,12 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net/http"
"os"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -105,7 +105,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &scalewayMetrics{
refreshMetrics: rmi,
}
@@ -185,10 +185,10 @@ func init() {
// the Discoverer interface.
type Discovery struct{}
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*scalewayMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
r, err := newRefresher(conf)
diff --git a/discovery/scaleway/testdata/instance.json b/discovery/scaleway/testdata/instance.json
index b433f7598e..19a6106daa 100644
--- a/discovery/scaleway/testdata/instance.json
+++ b/discovery/scaleway/testdata/instance.json
@@ -356,6 +356,222 @@
"placement_group": null,
"private_nics": [],
"zone": "nl-ams-1"
+ },
+ {
+ "id": "658abbf4-e6c6-4239-a483-3307763cf6e0",
+ "name": "multiple-ips",
+ "arch": "x86_64",
+ "commercial_type": "PLAY2-PICO",
+ "boot_type": "local",
+ "organization": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b",
+ "project": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b",
+ "hostname": "multiple-ips",
+ "image": {
+ "id": "f583f58c-1ea5-44ab-a1e6-2b2e7df32a86",
+ "name": "Ubuntu 24.04 Noble Numbat",
+ "organization": "51b656e3-4865-41e8-adbc-0c45bdd780db",
+ "project": "51b656e3-4865-41e8-adbc-0c45bdd780db",
+ "root_volume": {
+ "id": "cfab1e2e-fa24-480a-a372-61b19e8e2fda",
+ "name": "Ubuntu 24.04 Noble Numbat",
+ "volume_type": "unified",
+ "size": 10000000000
+ },
+ "extra_volumes": {
+
+ },
+ "public": true,
+ "arch": "x86_64",
+ "creation_date": "2024-04-26T09:24:38.624912+00:00",
+ "modification_date": "2024-04-26T09:24:38.624912+00:00",
+ "default_bootscript": null,
+ "from_server": "",
+ "state": "available",
+ "tags": [
+
+ ],
+ "zone": "fr-par-1"
+ },
+ "volumes": {
+ "0": {
+ "boot": false,
+ "id": "7d4dc5ae-3f3c-4f9c-91cf-4a47a2193e94",
+ "name": "Ubuntu 24.04 Noble Numbat",
+ "volume_type": "b_ssd",
+ "export_uri": null,
+ "organization": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b",
+ "project": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b",
+ "server": {
+ "id": "658abbf4-e6c6-4239-a483-3307763cf6e0",
+ "name": "multiple-ips"
+ },
+ "size": 10000000000,
+ "state": "available",
+ "creation_date": "2024-06-07T13:33:17.697162+00:00",
+ "modification_date": "2024-06-07T13:33:17.697162+00:00",
+ "tags": [
+
+ ],
+ "zone": "fr-par-1"
+ }
+ },
+ "tags": [
+
+ ],
+ "state": "running",
+ "protected": false,
+ "state_detail": "booted",
+ "public_ip": {
+ "id": "63fd9ede-58d7-482c-b21d-1d79d81a90dc",
+ "address": "163.172.136.10",
+ "dynamic": false,
+ "gateway": "62.210.0.1",
+ "netmask": "32",
+ "family": "inet",
+ "provisioning_mode": "dhcp",
+ "tags": [
+
+ ],
+ "state": "attached",
+ "ipam_id": "700644ed-f6a2-4c64-8508-bb867bc07673"
+ },
+ "public_ips": [
+ {
+ "id": "63fd9ede-58d7-482c-b21d-1d79d81a90dc",
+ "address": "163.172.136.10",
+ "dynamic": false,
+ "gateway": "62.210.0.1",
+ "netmask": "32",
+ "family": "inet",
+ "provisioning_mode": "dhcp",
+ "tags": [
+
+ ],
+ "state": "attached",
+ "ipam_id": "700644ed-f6a2-4c64-8508-bb867bc07673"
+ },
+ {
+ "id": "eed4575b-90e5-4102-b956-df874c911e2b",
+ "address": "212.47.248.223",
+ "dynamic": false,
+ "gateway": "62.210.0.1",
+ "netmask": "32",
+ "family": "inet",
+ "provisioning_mode": "manual",
+ "tags": [
+
+ ],
+ "state": "attached",
+ "ipam_id": "e2bdef64-828b-4f4a-a56b-954a85759adf"
+ },
+ {
+ "id": "fca8b329-6c0e-4f9c-aa88-d5c197b5919a",
+ "address": "51.15.231.134",
+ "dynamic": false,
+ "gateway": "62.210.0.1",
+ "netmask": "32",
+ "family": "inet",
+ "provisioning_mode": "manual",
+ "tags": [
+
+ ],
+ "state": "attached",
+ "ipam_id": "e56808db-b348-4b7e-ad23-995ae08dc1a1"
+ },
+ {
+ "id": "3ffa6774-124c-4e64-8afb-148c15304b25",
+ "address": "2001:bc8:710:4a69:dc00:ff:fe58:40c1",
+ "dynamic": false,
+ "gateway": "fe80::dc00:ff:fe58:40c2",
+ "netmask": "64",
+ "family": "inet6",
+ "provisioning_mode": "slaac",
+ "tags": [
+
+ ],
+ "state": "attached",
+ "ipam_id": "d97773d9-fd2c-4085-92bb-5c471abd132e"
+ },
+ {
+ "id": "28fcf539-8492-4603-b627-de0501ce8489",
+ "address": "2001:bc8:710:d::",
+ "dynamic": false,
+ "gateway": "fe80::dc00:ff:fe58:40c2",
+ "netmask": "64",
+ "family": "inet6",
+ "provisioning_mode": "manual",
+ "tags": [
+
+ ],
+ "state": "attached",
+ "ipam_id": "005d19ac-203c-4034-ab16-9ff4caadbdd5"
+ },
+ {
+ "id": "db6fafda-3a12-403d-8c9c-1a1cb1c315ba",
+ "address": "2001:bc8:710:5417::",
+ "dynamic": false,
+ "gateway": "fe80::dc00:ff:fe58:40c2",
+ "netmask": "64",
+ "family": "inet6",
+ "provisioning_mode": "manual",
+ "tags": [
+
+ ],
+ "state": "attached",
+ "ipam_id": "8c62fac8-4134-462c-ba48-71ce4f8ac939"
+ }
+ ],
+ "mac_address": "de:00:00:58:40:c1",
+ "routed_ip_enabled": true,
+ "ipv6": null,
+ "extra_networks": [
+
+ ],
+ "dynamic_ip_required": false,
+ "enable_ipv6": false,
+ "private_ip": null,
+ "creation_date": "2024-06-07T13:33:17.697162+00:00",
+ "modification_date": "2024-06-07T13:33:26.021167+00:00",
+ "bootscript": {
+ "id": "fdfe150f-a870-4ce4-b432-9f56b5b995c1",
+ "public": true,
+ "title": "x86_64 mainline 4.4.230 rev1",
+ "architecture": "x86_64",
+ "organization": "11111111-1111-4111-8111-111111111111",
+ "project": "11111111-1111-4111-8111-111111111111",
+ "kernel": "http://10.194.3.9/kernel/x86_64-mainline-lts-4.4-4.4.230-rev1/vmlinuz-4.4.230",
+ "dtb": "",
+ "initrd": "http://10.194.3.9/initrd/initrd-Linux-x86_64-v3.14.6.gz",
+ "bootcmdargs": "LINUX_COMMON scaleway boot=local nbd.max_part=16",
+ "default": true,
+ "zone": "fr-par-1"
+ },
+ "security_group": {
+ "id": "0fe819c3-274d-472a-b3f5-ddb258d2d8bb",
+ "name": "Default security group"
+ },
+ "location": {
+ "zone_id": "par1",
+ "platform_id": "14",
+ "cluster_id": "7",
+ "hypervisor_id": "801",
+ "node_id": "95"
+ },
+ "maintenances": [
+
+ ],
+ "allowed_actions": [
+ "poweroff",
+ "terminate",
+ "reboot",
+ "stop_in_place",
+ "backup"
+ ],
+ "placement_group": null,
+ "private_nics": [
+
+ ],
+ "zone": "fr-par-1"
}
]
}
\ No newline at end of file
diff --git a/util/logging/ratelimit.go b/discovery/stackit/metrics.go
similarity index 54%
rename from util/logging/ratelimit.go
rename to discovery/stackit/metrics.go
index 32d1e249e6..4143b144b7 100644
--- a/util/logging/ratelimit.go
+++ b/discovery/stackit/metrics.go
@@ -1,4 +1,4 @@
-// Copyright 2019 The Prometheus Authors
+// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -11,29 +11,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package logging
+package stackit
import (
- "github.com/go-kit/log"
- "golang.org/x/time/rate"
+ "github.com/prometheus/prometheus/discovery"
)
-type ratelimiter struct {
- limiter *rate.Limiter
- next log.Logger
+var _ discovery.DiscovererMetrics = (*stackitMetrics)(nil)
+
+type stackitMetrics struct {
+ refreshMetrics discovery.RefreshMetricsInstantiator
}
-// RateLimit write to a logger.
-func RateLimit(next log.Logger, limit rate.Limit) log.Logger {
- return &ratelimiter{
- limiter: rate.NewLimiter(limit, int(limit)),
- next: next,
- }
-}
-
-func (r *ratelimiter) Log(keyvals ...interface{}) error {
- if r.limiter.Allow() {
- return r.next.Log(keyvals...)
- }
+// Register implements discovery.DiscovererMetrics.
+func (m *stackitMetrics) Register() error {
return nil
}
+
+// Unregister implements discovery.DiscovererMetrics.
+func (m *stackitMetrics) Unregister() {}
diff --git a/discovery/stackit/mock_test.go b/discovery/stackit/mock_test.go
new file mode 100644
index 0000000000..59641ce2bc
--- /dev/null
+++ b/discovery/stackit/mock_test.go
@@ -0,0 +1,162 @@
+// Copyright 2020 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stackit
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+)
+
+// SDMock is the interface for the STACKIT IAAS API mock.
+type SDMock struct {
+ t *testing.T
+ Server *httptest.Server
+ Mux *http.ServeMux
+}
+
+// NewSDMock returns a new SDMock.
+func NewSDMock(t *testing.T) *SDMock {
+ return &SDMock{
+ t: t,
+ }
+}
+
+// Endpoint returns the URI to the mock server.
+func (m *SDMock) Endpoint() string {
+ return m.Server.URL + "/"
+}
+
+// Setup creates the mock server.
+func (m *SDMock) Setup() {
+ m.Mux = http.NewServeMux()
+ m.Server = httptest.NewServer(m.Mux)
+ m.t.Cleanup(m.Server.Close)
+}
+
+// ShutdownServer creates the mock server.
+func (m *SDMock) ShutdownServer() {
+ m.Server.Close()
+}
+
+const (
+ testToken = "LRK9DAWQ1ZAEFSrCNEEzLCUwhYX1U3g7wMg4dTlkkDC96fyDuyJ39nVbVjCKSDfj"
+ testProjectID = "00000000-0000-0000-0000-000000000000"
+)
+
+// HandleServers mocks the STACKIT IAAS API.
+func (m *SDMock) HandleServers() {
+ // /token endpoint mocks the token endpoint for service account authentication.
+ // It checks if the request body starts with "assertion=ey" to simulate a valid assertion
+ // as defined in RFC 7523.
+ m.Mux.HandleFunc("/token", func(w http.ResponseWriter, r *http.Request) {
+ reqBody, err := io.ReadAll(r.Body)
+ if err != nil {
+ w.WriteHeader(http.StatusInternalServerError)
+ _, _ = fmt.Fprint(w, err)
+ return
+ }
+
+ // Expecting HTTP form encoded body with the field assertion.
+ // JWT always start with "ey" (base64url encoded).
+ if !bytes.HasPrefix(reqBody, []byte("assertion=ey")) {
+ w.WriteHeader(http.StatusUnauthorized)
+ return
+ }
+
+ w.Header().Add("content-type", "application/json; charset=utf-8")
+ w.WriteHeader(http.StatusOK)
+
+ _, _ = fmt.Fprintf(w, `{"access_token": "%s"}`, testToken)
+ })
+
+ m.Mux.HandleFunc(fmt.Sprintf("/v1/projects/%s/servers", testProjectID), func(w http.ResponseWriter, r *http.Request) {
+ if r.Header.Get("Authorization") != fmt.Sprintf("Bearer %s", testToken) {
+ w.WriteHeader(http.StatusUnauthorized)
+ return
+ }
+
+ w.Header().Add("content-type", "application/json; charset=utf-8")
+ w.WriteHeader(http.StatusOK)
+
+ _, _ = fmt.Fprint(w, `
+{
+ "items": [
+ {
+ "availabilityZone": "eu01-3",
+ "bootVolume": {
+ "deleteOnTermination": false,
+ "id": "1c15e4cc-8474-46be-b875-b473ea9fe80c"
+ },
+ "createdAt": "2025-03-12T14:48:17Z",
+ "id": "b4176700-596a-4f80-9fc8-5f9c58a606e1",
+ "labels": {
+ "provisionSTACKITServerAgent": "true",
+ "stackit_project_id": "00000000-0000-0000-0000-000000000000"
+ },
+ "launchedAt": "2025-03-12T14:48:52Z",
+ "machineType": "g1.1",
+ "name": "runcommandtest",
+ "nics": [
+ {
+ "ipv4": "10.0.0.153",
+ "mac": "fa:16:4f:42:1c:d3",
+ "networkId": "3173494f-2f6c-490d-8c12-4b3c86b4338b",
+ "networkName": "test",
+ "publicIp": "192.0.2.1",
+ "nicId": "b36097c5-e1c5-4e12-ae97-c03e144db127",
+ "nicSecurity": true,
+ "securityGroups": [
+ "6e60809f-bed3-46c6-a39c-adddd6455674"
+ ]
+ }
+ ],
+ "powerStatus": "STOPPED",
+ "serviceAccountMails": [],
+ "status": "INACTIVE",
+ "updatedAt": "2025-03-13T07:08:29Z",
+ "userData": null,
+ "volumes": [
+ "1c15e4cc-8474-46be-b875-b473ea9fe80c"
+ ]
+ },
+ {
+ "availabilityZone": "eu01-m",
+ "bootVolume": {
+ "deleteOnTermination": false,
+ "id": "1e3ffe2b-878f-46e5-b39e-372e13a09551"
+ },
+ "createdAt": "2025-04-10T16:45:25Z",
+ "id": "ee337436-1f15-4647-a03e-154009966179",
+ "labels": {},
+ "launchedAt": "2025-04-10T16:46:00Z",
+ "machineType": "t1.1",
+ "name": "server1",
+ "nics": [],
+ "powerStatus": "RUNNING",
+ "serviceAccountMails": [],
+ "status": "ACTIVE",
+ "updatedAt": "2025-04-10T16:46:00Z",
+ "volumes": [
+ "1e3ffe2b-878f-46e5-b39e-372e13a09551"
+ ]
+ }
+ ]
+}`,
+ )
+ })
+}
diff --git a/discovery/stackit/server.go b/discovery/stackit/server.go
new file mode 100644
index 0000000000..1be834a689
--- /dev/null
+++ b/discovery/stackit/server.go
@@ -0,0 +1,222 @@
+// Copyright 2020 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stackit
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log/slog"
+ "net"
+ "net/http"
+ "net/url"
+ "strconv"
+ "time"
+
+ "github.com/prometheus/common/config"
+ "github.com/prometheus/common/model"
+ "github.com/stackitcloud/stackit-sdk-go/core/auth"
+ stackitconfig "github.com/stackitcloud/stackit-sdk-go/core/config"
+
+ "github.com/prometheus/prometheus/discovery/refresh"
+ "github.com/prometheus/prometheus/discovery/targetgroup"
+ "github.com/prometheus/prometheus/util/strutil"
+)
+
+const (
+ stackitAPIEndpoint = "https://iaas.api.%s.stackit.cloud"
+
+ stackitLabelPrivateIPv4 = stackitLabelPrefix + "private_ipv4_"
+ stackitLabelType = stackitLabelPrefix + "type"
+ stackitLabelLabel = stackitLabelPrefix + "label_"
+ stackitLabelLabelPresent = stackitLabelPrefix + "labelpresent_"
+)
+
+// Discovery periodically performs STACKIT Cloud requests.
+// It implements the Discoverer interface.
+type iaasDiscovery struct {
+ *refresh.Discovery
+ httpClient *http.Client
+ logger *slog.Logger
+ apiEndpoint string
+ project string
+ port int
+}
+
+// newServerDiscovery returns a new iaasDiscovery, which periodically refreshes its targets.
+func newServerDiscovery(conf *SDConfig, logger *slog.Logger) (*iaasDiscovery, error) {
+ d := &iaasDiscovery{
+ project: conf.Project,
+ port: conf.Port,
+ apiEndpoint: conf.Endpoint,
+ logger: logger,
+ }
+
+ rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "stackit_sd")
+ if err != nil {
+ return nil, err
+ }
+
+ d.apiEndpoint = conf.Endpoint
+ if d.apiEndpoint == "" {
+ d.apiEndpoint = fmt.Sprintf(stackitAPIEndpoint, conf.Region)
+ }
+
+ servers := stackitconfig.ServerConfigurations{stackitconfig.ServerConfiguration{
+ URL: d.apiEndpoint,
+ Description: "STACKIT IAAS API",
+ }}
+
+ d.httpClient = &http.Client{
+ Timeout: time.Duration(conf.RefreshInterval),
+ Transport: rt,
+ }
+
+ stackitConfiguration := &stackitconfig.Configuration{
+ UserAgent: userAgent,
+ HTTPClient: d.httpClient,
+ Servers: servers,
+ NoAuth: conf.ServiceAccountKey == "" && conf.ServiceAccountKeyPath == "",
+
+ ServiceAccountKey: conf.ServiceAccountKey,
+ PrivateKey: conf.PrivateKey,
+ ServiceAccountKeyPath: conf.ServiceAccountKeyPath,
+ PrivateKeyPath: conf.PrivateKeyPath,
+ CredentialsFilePath: conf.CredentialsFilePath,
+ }
+
+ if conf.tokenURL != "" {
+ stackitConfiguration.TokenCustomUrl = conf.tokenURL
+ }
+
+ authRoundTripper, err := auth.SetupAuth(stackitConfiguration)
+ if err != nil {
+ return nil, fmt.Errorf("setting up authentication: %w", err)
+ }
+
+ d.httpClient.Transport = authRoundTripper
+
+ return d, nil
+}
+
+func (i *iaasDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
+ apiURL, err := url.Parse(i.apiEndpoint)
+ if err != nil {
+ return nil, fmt.Errorf("invalid API endpoint URL %s: %w", i.apiEndpoint, err)
+ }
+
+ apiURL.Path, err = url.JoinPath(apiURL.Path, "v1", "projects", i.project, "servers")
+ if err != nil {
+ return nil, fmt.Errorf("joining URL path: %w", err)
+ }
+
+ q := apiURL.Query()
+ q.Set("details", "true")
+ apiURL.RawQuery = q.Encode()
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL.String(), nil)
+ if err != nil {
+ return nil, fmt.Errorf("creating request: %w", err)
+ }
+
+ req.Header.Set("Accept", "application/json")
+
+ res, err := i.httpClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("sending request: %w", err)
+ }
+
+ defer res.Body.Close()
+
+ if res.StatusCode != http.StatusOK {
+ errorMessage, _ := io.ReadAll(res.Body)
+
+ return nil, fmt.Errorf("unexpected status code %d: %s", res.StatusCode, string(errorMessage))
+ }
+
+ var serversResponse *ServerListResponse
+
+ if err := json.NewDecoder(res.Body).Decode(&serversResponse); err != nil {
+ return nil, fmt.Errorf("decoding response: %w", err)
+ }
+
+ if serversResponse == nil || serversResponse.Items == nil || len(*serversResponse.Items) == 0 {
+ return []*targetgroup.Group{{Source: "stackit", Targets: []model.LabelSet{}}}, nil
+ }
+
+ targets := make([]model.LabelSet, 0, len(*serversResponse.Items))
+ for _, server := range *serversResponse.Items {
+ if server.Nics == nil {
+ i.logger.Debug("server has no network interfaces. Skipping", slog.String("server_id", server.ID))
+ continue
+ }
+
+ labels := model.LabelSet{
+ stackitLabelProject: model.LabelValue(i.project),
+ stackitLabelID: model.LabelValue(server.ID),
+ stackitLabelName: model.LabelValue(server.Name),
+ stackitLabelAvailabilityZone: model.LabelValue(server.AvailabilityZone),
+ stackitLabelStatus: model.LabelValue(server.Status),
+ stackitLabelPowerStatus: model.LabelValue(server.PowerStatus),
+ stackitLabelType: model.LabelValue(server.MachineType),
+ }
+
+ var (
+ addressLabel string
+ serverPublicIP string
+ )
+
+ for _, nic := range server.Nics {
+ if nic.PublicIP != nil && *nic.PublicIP != "" && serverPublicIP == "" {
+ serverPublicIP = *nic.PublicIP
+ addressLabel = serverPublicIP
+ }
+
+ if nic.IPv4 != nil && *nic.IPv4 != "" {
+ networkLabel := model.LabelName(stackitLabelPrivateIPv4 + strutil.SanitizeLabelName(nic.NetworkName))
+ labels[networkLabel] = model.LabelValue(*nic.IPv4)
+ if addressLabel == "" {
+ addressLabel = *nic.IPv4
+ }
+ }
+ }
+
+ if addressLabel == "" {
+ // Skip servers without IPs.
+ continue
+ }
+
+ // Public IPs for servers are optional.
+ if serverPublicIP != "" {
+ labels[stackitLabelPublicIPv4] = model.LabelValue(serverPublicIP)
+ }
+
+ labels[model.AddressLabel] = model.LabelValue(net.JoinHostPort(addressLabel, strconv.FormatUint(uint64(i.port), 10)))
+
+ for labelKey, labelValue := range server.Labels {
+ if labelStringValue, ok := labelValue.(string); ok {
+ presentLabel := model.LabelName(stackitLabelLabelPresent + strutil.SanitizeLabelName(labelKey))
+ labels[presentLabel] = "true"
+
+ label := model.LabelName(stackitLabelLabel + strutil.SanitizeLabelName(labelKey))
+ labels[label] = model.LabelValue(labelStringValue)
+ }
+ }
+
+ targets = append(targets, labels)
+ }
+
+ return []*targetgroup.Group{{Source: "stackit", Targets: targets}}, nil
+}
diff --git a/discovery/stackit/server_test.go b/discovery/stackit/server_test.go
new file mode 100644
index 0000000000..117fbdd66d
--- /dev/null
+++ b/discovery/stackit/server_test.go
@@ -0,0 +1,131 @@
+// Copyright 2020 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stackit
+
+import (
+ "context"
+ "crypto/rand"
+ "crypto/rsa"
+ "crypto/x509"
+ "encoding/pem"
+ "testing"
+
+ "github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
+ "github.com/stretchr/testify/require"
+)
+
+type serverSDTestSuite struct {
+ Mock *SDMock
+}
+
+func (s *serverSDTestSuite) SetupTest(t *testing.T) {
+ s.Mock = NewSDMock(t)
+ s.Mock.Setup()
+
+ s.Mock.HandleServers()
+}
+
+func TestServerSDRefresh(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ cfg SDConfig
+ }{
+ {
+ name: "default with token",
+ cfg: func() SDConfig {
+ cfg := DefaultSDConfig
+ cfg.HTTPClientConfig.BearerToken = testToken
+
+ return cfg
+ }(),
+ },
+ {
+ name: "default with service account key",
+ cfg: func() SDConfig {
+ // Generate a new RSA key pair with a size of 2048 bits
+ key, err := rsa.GenerateKey(rand.Reader, 2048)
+ require.NoError(t, err)
+
+ cfg := DefaultSDConfig
+ cfg.PrivateKey = string(pem.EncodeToMemory(&pem.Block{
+ Type: "RSA PRIVATE KEY",
+ Bytes: x509.MarshalPKCS1PrivateKey(key),
+ }))
+
+ cfg.ServiceAccountKey = `{
+ "Active": true,
+ "CreatedAt": "2025-04-05T12:34:56Z",
+ "Credentials": {
+ "Aud": "https://stackit-service-account-prod.apps.01.cf.eu01.stackit.cloud",
+ "Iss": "stackit@sa.stackit.cloud",
+ "Kid": "123e4567-e89b-12d3-a456-426614174000",
+ "Sub": "123e4567-e89b-12d3-a456-426614174001"
+ },
+ "ID": "123e4567-e89b-12d3-a456-426614174002",
+ "KeyAlgorithm": "RSA_2048",
+ "KeyOrigin": "USER_PROVIDED",
+ "KeyType": "USER_MANAGED",
+ "PublicKey": "...",
+ "ValidUntil": "2025-04-05T13:34:56Z"
+}`
+
+ return cfg
+ }(),
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ suite := &serverSDTestSuite{}
+ suite.SetupTest(t)
+ defer suite.Mock.ShutdownServer()
+
+ tc.cfg.Endpoint = suite.Mock.Endpoint()
+ tc.cfg.tokenURL = suite.Mock.Endpoint() + "token"
+ tc.cfg.Project = testProjectID
+
+ d, err := newServerDiscovery(&tc.cfg, promslog.NewNopLogger())
+ require.NoError(t, err)
+
+ targetGroups, err := d.refresh(context.Background())
+ require.NoError(t, err)
+ require.Len(t, targetGroups, 1)
+
+ targetGroup := targetGroups[0]
+ require.NotNil(t, targetGroup, "targetGroup should not be nil")
+ require.NotNil(t, targetGroup.Targets, "targetGroup.targets should not be nil")
+ require.Len(t, targetGroup.Targets, 1)
+
+ for i, labelSet := range []model.LabelSet{
+ {
+ "__address__": model.LabelValue("192.0.2.1:80"),
+ "__meta_stackit_project": model.LabelValue("00000000-0000-0000-0000-000000000000"),
+ "__meta_stackit_id": model.LabelValue("b4176700-596a-4f80-9fc8-5f9c58a606e1"),
+ "__meta_stackit_type": model.LabelValue("g1.1"),
+ "__meta_stackit_private_ipv4_test": model.LabelValue("10.0.0.153"),
+ "__meta_stackit_public_ipv4": model.LabelValue("192.0.2.1"),
+ "__meta_stackit_labelpresent_provisionSTACKITServerAgent": model.LabelValue("true"),
+ "__meta_stackit_label_provisionSTACKITServerAgent": model.LabelValue("true"),
+ "__meta_stackit_labelpresent_stackit_project_id": model.LabelValue("true"),
+ "__meta_stackit_name": model.LabelValue("runcommandtest"),
+ "__meta_stackit_availability_zone": model.LabelValue("eu01-3"),
+ "__meta_stackit_status": model.LabelValue("INACTIVE"),
+ "__meta_stackit_power_status": model.LabelValue("STOPPED"),
+ "__meta_stackit_label_stackit_project_id": model.LabelValue("00000000-0000-0000-0000-000000000000"),
+ },
+ } {
+ require.Equal(t, labelSet, targetGroup.Targets[i])
+ }
+ })
+ }
+}
diff --git a/discovery/stackit/stackit.go b/discovery/stackit/stackit.go
new file mode 100644
index 0000000000..030f2bdb55
--- /dev/null
+++ b/discovery/stackit/stackit.go
@@ -0,0 +1,153 @@
+// Copyright 2020 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stackit
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "log/slog"
+ "net/url"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/common/config"
+ "github.com/prometheus/common/model"
+ "github.com/prometheus/common/version"
+
+ "github.com/prometheus/prometheus/discovery"
+ "github.com/prometheus/prometheus/discovery/refresh"
+ "github.com/prometheus/prometheus/discovery/targetgroup"
+)
+
+const (
+ stackitLabelPrefix = model.MetaLabelPrefix + "stackit_"
+ stackitLabelProject = stackitLabelPrefix + "project"
+ stackitLabelID = stackitLabelPrefix + "id"
+ stackitLabelName = stackitLabelPrefix + "name"
+ stackitLabelStatus = stackitLabelPrefix + "status"
+ stackitLabelPowerStatus = stackitLabelPrefix + "power_status"
+ stackitLabelAvailabilityZone = stackitLabelPrefix + "availability_zone"
+ stackitLabelPublicIPv4 = stackitLabelPrefix + "public_ipv4"
+)
+
+var userAgent = version.PrometheusUserAgent()
+
+// DefaultSDConfig is the default STACKIT SD configuration.
+var DefaultSDConfig = SDConfig{
+ Region: "eu01",
+ Port: 80,
+ RefreshInterval: model.Duration(60 * time.Second),
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+}
+
+func init() {
+ discovery.RegisterConfig(&SDConfig{})
+}
+
+// SDConfig is the configuration for STACKIT based service discovery.
+type SDConfig struct {
+ HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
+
+ Project string `yaml:"project"`
+ RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
+ Port int `yaml:"port,omitempty"`
+ Region string `yaml:"region,omitempty"`
+ Endpoint string `yaml:"endpoint,omitempty"`
+ ServiceAccountKey string `yaml:"service_account_key,omitempty"`
+ PrivateKey string `yaml:"private_key,omitempty"`
+ ServiceAccountKeyPath string `yaml:"service_account_key_path,omitempty"`
+ PrivateKeyPath string `yaml:"private_key_path,omitempty"`
+ CredentialsFilePath string `yaml:"credentials_file_path,omitempty"`
+
+ // For testing only
+ tokenURL string
+}
+
+// NewDiscovererMetrics implements discovery.Config.
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+ return &stackitMetrics{
+ refreshMetrics: rmi,
+ }
+}
+
+// Name returns the name of the Config.
+func (*SDConfig) Name() string { return "stackit" }
+
+// NewDiscoverer returns a Discoverer for the Config.
+func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
+ return NewDiscovery(c, opts.Logger, opts.Metrics)
+}
+
+type refresher interface {
+ refresh(context.Context) ([]*targetgroup.Group, error)
+}
+
+// UnmarshalYAML implements the yaml.Unmarshaler interface.
+func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
+ *c = DefaultSDConfig
+ type plain SDConfig
+ err := unmarshal((*plain)(c))
+ if err != nil {
+ return err
+ }
+
+ if c.Endpoint == "" && c.Region == "" {
+ return errors.New("stackit_sd: endpoint and region missing")
+ }
+
+ if _, err = url.Parse(c.Endpoint); err != nil {
+ return fmt.Errorf("stackit_sd: invalid endpoint %q: %w", c.Endpoint, err)
+ }
+
+ return c.HTTPClientConfig.Validate()
+}
+
+// SetDirectory joins any relative file paths with dir.
+func (c *SDConfig) SetDirectory(dir string) {
+ c.HTTPClientConfig.SetDirectory(dir)
+}
+
+// Discovery periodically performs STACKIT API requests. It implements
+// the Discoverer interface.
+type Discovery struct {
+ *refresh.Discovery
+}
+
+// NewDiscovery returns a new Discovery which periodically refreshes its targets.
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
+ m, ok := metrics.(*stackitMetrics)
+ if !ok {
+ return nil, errors.New("invalid discovery metrics type")
+ }
+
+ r, err := newRefresher(conf, logger)
+ if err != nil {
+ return nil, err
+ }
+
+ return refresh.NewDiscovery(
+ refresh.Options{
+ Logger: logger,
+ Mech: "stackit",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: r.refresh,
+ MetricsInstantiator: m.refreshMetrics,
+ },
+ ), nil
+}
+
+func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) {
+ return newServerDiscovery(conf, l)
+}
diff --git a/discovery/stackit/types.go b/discovery/stackit/types.go
new file mode 100644
index 0000000000..66681c3455
--- /dev/null
+++ b/discovery/stackit/types.go
@@ -0,0 +1,38 @@
+// Copyright 2020 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stackit
+
+// ServerListResponse Response object for server list request.
+// https://docs.api.eu01.stackit.cloud/documentation/iaas/version/v1#tag/Servers/operation/v1ListServersInProject
+type ServerListResponse struct {
+ Items *[]Server `json:"items"`
+}
+
+type Server struct {
+ AvailabilityZone string `json:"availabilityZone"`
+ ID string `json:"id"`
+ Labels map[string]interface{} `json:"labels"`
+ MachineType string `json:"machineType"`
+ Name string `json:"name"`
+ Nics []ServerNetwork `json:"nics"`
+ PowerStatus string `json:"powerStatus"`
+ Status string `json:"status"`
+}
+
+// ServerNetwork Describes the object that matches servers to its networks.
+type ServerNetwork struct {
+ NetworkName string `json:"networkName"`
+ IPv4 *string `json:"ipv4,omitempty"`
+ PublicIP *string `json:"publicIp,omitempty"`
+}
diff --git a/discovery/triton/triton.go b/discovery/triton/triton.go
index 675149f2a3..5efe49e23d 100644
--- a/discovery/triton/triton.go
+++ b/discovery/triton/triton.go
@@ -19,12 +19,12 @@ import (
"errors"
"fmt"
"io"
+ "log/slog"
"net/http"
"net/url"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/mwitkow/go-conntrack"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -71,7 +71,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &tritonMetrics{
refreshMetrics: rmi,
}
@@ -146,10 +146,10 @@ type Discovery struct {
}
// New returns a new Discovery which periodically refreshes its targets.
-func New(logger log.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func New(logger *slog.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*tritonMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
tls, err := config.NewTLSConfig(&conf.TLSConfig)
diff --git a/discovery/triton/triton_test.go b/discovery/triton/triton_test.go
index e37693e6bf..b0dccbf898 100644
--- a/discovery/triton/triton_test.go
+++ b/discovery/triton/triton_test.go
@@ -21,7 +21,6 @@ import (
"net/http/httptest"
"net/url"
"strconv"
- "strings"
"testing"
"github.com/prometheus/client_golang/prometheus"
@@ -182,8 +181,7 @@ func TestTritonSDRefreshNoServer(t *testing.T) {
td, m, _ := newTritonDiscovery(conf)
_, err := td.refresh(context.Background())
- require.Error(t, err)
- require.True(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint"))
+ require.ErrorContains(t, err, "an error occurred when requesting targets from the discovery endpoint")
m.Unregister()
}
@@ -193,8 +191,7 @@ func TestTritonSDRefreshCancelled(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := td.refresh(ctx)
- require.Error(t, err)
- require.True(t, strings.Contains(err.Error(), context.Canceled.Error()))
+ require.ErrorContains(t, err, context.Canceled.Error())
m.Unregister()
}
@@ -233,7 +230,7 @@ func TestTritonSDRefreshCNsWithHostname(t *testing.T) {
func testTritonSDRefresh(t *testing.T, c SDConfig, dstr string) []model.LabelSet {
var (
td, m, _ = newTritonDiscovery(c)
- s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
fmt.Fprintln(w, dstr)
}))
)
diff --git a/discovery/util.go b/discovery/util.go
index 83cc640dd9..4e2a088518 100644
--- a/discovery/util.go
+++ b/discovery/util.go
@@ -19,8 +19,8 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
-// A utility to be used by implementations of discovery.Discoverer
-// which need to manage the lifetime of their metrics.
+// MetricRegisterer is used by implementations of discovery.Discoverer that need
+// to manage the lifetime of their metrics.
type MetricRegisterer interface {
RegisterMetrics() error
UnregisterMetrics()
@@ -34,7 +34,7 @@ type metricRegistererImpl struct {
var _ MetricRegisterer = &metricRegistererImpl{}
-// Creates an instance of a MetricRegisterer.
+// NewMetricRegisterer creates an instance of a MetricRegisterer.
// Typically called inside the implementation of the NewDiscoverer() method.
func NewMetricRegisterer(reg prometheus.Registerer, metrics []prometheus.Collector) MetricRegisterer {
return &metricRegistererImpl{
diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go
index c8af2f1587..a7745eed46 100644
--- a/discovery/uyuni/uyuni.go
+++ b/discovery/uyuni/uyuni.go
@@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
+ "log/slog"
"net/http"
"net/url"
"path"
@@ -24,7 +25,6 @@ import (
"strings"
"time"
- "github.com/go-kit/log"
"github.com/kolo/xmlrpc"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@@ -41,10 +41,10 @@ const (
uyuniMetaLabelPrefix = model.MetaLabelPrefix + "uyuni_"
uyuniLabelMinionHostname = uyuniMetaLabelPrefix + "minion_hostname"
uyuniLabelPrimaryFQDN = uyuniMetaLabelPrefix + "primary_fqdn"
- uyuniLablelSystemID = uyuniMetaLabelPrefix + "system_id"
- uyuniLablelGroups = uyuniMetaLabelPrefix + "groups"
- uyuniLablelEndpointName = uyuniMetaLabelPrefix + "endpoint_name"
- uyuniLablelExporter = uyuniMetaLabelPrefix + "exporter"
+ uyuniLabelSystemID = uyuniMetaLabelPrefix + "system_id"
+ uyuniLabelGroups = uyuniMetaLabelPrefix + "groups"
+ uyuniLabelEndpointName = uyuniMetaLabelPrefix + "endpoint_name"
+ uyuniLabelExporter = uyuniMetaLabelPrefix + "exporter"
uyuniLabelProxyModule = uyuniMetaLabelPrefix + "proxy_module"
uyuniLabelMetricsPath = uyuniMetaLabelPrefix + "metrics_path"
uyuniLabelScheme = uyuniMetaLabelPrefix + "scheme"
@@ -109,11 +109,11 @@ type Discovery struct {
entitlement string
separator string
interval time.Duration
- logger log.Logger
+ logger *slog.Logger
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &uyuniMetrics{
refreshMetrics: rmi,
}
@@ -141,18 +141,22 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err
}
if c.Server == "" {
+ //nolint:staticcheck // Capitalized first word.
return errors.New("Uyuni SD configuration requires server host")
}
_, err = url.Parse(c.Server)
if err != nil {
+ //nolint:staticcheck // Capitalized first word.
return fmt.Errorf("Uyuni Server URL is not valid: %w", err)
}
if c.Username == "" {
+ //nolint:staticcheck // Capitalized first word.
return errors.New("Uyuni SD configuration requires a username")
}
if c.Password == "" {
+ //nolint:staticcheck // Capitalized first word.
return errors.New("Uyuni SD configuration requires a password")
}
return c.HTTPClientConfig.Validate()
@@ -205,17 +209,14 @@ func getEndpointInfoForSystems(
err := rpcclient.Call(
"system.monitoring.listEndpoints",
[]interface{}{token, systemIDs}, &endpointInfos)
- if err != nil {
- return nil, err
- }
return endpointInfos, err
}
// NewDiscovery returns a uyuni discovery for the given configuration.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*uyuniMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
apiURL, err := url.Parse(conf.Server)
@@ -270,10 +271,10 @@ func (d *Discovery) getEndpointLabels(
model.AddressLabel: model.LabelValue(addr),
uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname),
uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN),
- uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)),
- uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)),
- uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName),
- uyuniLablelExporter: model.LabelValue(endpoint.ExporterName),
+ uyuniLabelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)),
+ uyuniLabelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)),
+ uyuniLabelEndpointName: model.LabelValue(endpoint.EndpointName),
+ uyuniLabelExporter: model.LabelValue(endpoint.ExporterName),
uyuniLabelProxyModule: model.LabelValue(endpoint.Module),
uyuniLabelMetricsPath: model.LabelValue(endpoint.Path),
uyuniLabelScheme: model.LabelValue(scheme),
diff --git a/discovery/uyuni/uyuni_test.go b/discovery/uyuni/uyuni_test.go
index 09be23e2b4..46567587a8 100644
--- a/discovery/uyuni/uyuni_test.go
+++ b/discovery/uyuni/uyuni_test.go
@@ -21,9 +21,8 @@ import (
"testing"
"time"
- "github.com/stretchr/testify/require"
-
"github.com/prometheus/client_golang/prometheus"
+ "github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -59,7 +58,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err
func TestUyuniSDHandleError(t *testing.T) {
var (
errTesting = "unable to login to Uyuni API: request error: bad status code - 500"
- respHandler = func(w http.ResponseWriter, r *http.Request) {
+ respHandler = func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, ``)
@@ -75,7 +74,7 @@ func TestUyuniSDLogin(t *testing.T) {
var (
errTesting = "unable to get the managed system groups information of monitored clients: request error: bad status code - 500"
call = 0
- respHandler = func(w http.ResponseWriter, r *http.Request) {
+ respHandler = func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "application/xml")
switch call {
case 0:
@@ -106,7 +105,7 @@ func TestUyuniSDLogin(t *testing.T) {
func TestUyuniSDSkipLogin(t *testing.T) {
var (
errTesting = "unable to get the managed system groups information of monitored clients: request error: bad status code - 500"
- respHandler = func(w http.ResponseWriter, r *http.Request) {
+ respHandler = func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, ``)
diff --git a/discovery/vultr/vultr.go b/discovery/vultr/vultr.go
index aaa9c64e47..0ab477438b 100644
--- a/discovery/vultr/vultr.go
+++ b/discovery/vultr/vultr.go
@@ -15,14 +15,14 @@ package vultr
import (
"context"
- "fmt"
+ "errors"
+ "log/slog"
"net"
"net/http"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -75,7 +75,7 @@ type SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &vultrMetrics{
refreshMetrics: rmi,
}
@@ -114,10 +114,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*vultrMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
d := &Discovery{
@@ -134,11 +134,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
Timeout: time.Duration(conf.RefreshInterval),
})
- d.client.SetUserAgent(fmt.Sprintf("Prometheus/%s", version.Version))
-
- if err != nil {
- return nil, fmt.Errorf("error setting up vultr agent: %w", err)
- }
+ d.client.SetUserAgent(version.PrometheusUserAgent())
d.Discovery = refresh.NewDiscovery(
refresh.Options{
diff --git a/discovery/vultr/vultr_test.go b/discovery/vultr/vultr_test.go
index 2f12a35529..00ef21e38c 100644
--- a/discovery/vultr/vultr_test.go
+++ b/discovery/vultr/vultr_test.go
@@ -19,9 +19,9 @@ import (
"net/url"
"testing"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@@ -57,7 +57,7 @@ func TestVultrSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
+ d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
endpoint, err := url.Parse(sdMock.Mock.Endpoint())
require.NoError(t, err)
diff --git a/discovery/xds/client.go b/discovery/xds/client.go
index 027ceb2715..a27e060fbd 100644
--- a/discovery/xds/client.go
+++ b/discovery/xds/client.go
@@ -30,7 +30,7 @@ import (
"github.com/prometheus/common/version"
)
-var userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
+var userAgent = version.PrometheusUserAgent()
// ResourceClient exposes the xDS protocol for a single resource type.
// See https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#rest-json-polling-subscriptions .
diff --git a/discovery/xds/client_test.go b/discovery/xds/client_test.go
index b699995fb7..bf0e53b348 100644
--- a/discovery/xds/client_test.go
+++ b/discovery/xds/client_test.go
@@ -52,16 +52,14 @@ func TestMakeXDSResourceHttpEndpointEmptyServerURLScheme(t *testing.T) {
endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("127.0.0.1"), "monitoring")
require.Empty(t, endpointURL)
- require.Error(t, err)
- require.Equal(t, "invalid xDS server URL", err.Error())
+ require.EqualError(t, err, "invalid xDS server URL")
}
func TestMakeXDSResourceHttpEndpointEmptyServerURLHost(t *testing.T) {
endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("grpc://127.0.0.1"), "monitoring")
require.Empty(t, endpointURL)
- require.Error(t, err)
- require.Contains(t, err.Error(), "must be either 'http' or 'https'")
+ require.ErrorContains(t, err, "must be either 'http' or 'https'")
}
func TestMakeXDSResourceHttpEndpoint(t *testing.T) {
@@ -108,7 +106,7 @@ func createTestHTTPResourceClient(t *testing.T, conf *HTTPResourceClientConfig,
}
func TestHTTPResourceClientFetchEmptyResponse(t *testing.T) {
- client, cleanup := createTestHTTPResourceClient(t, testHTTPResourceConfig(), ProtocolV3, func(request *v3.DiscoveryRequest) (*v3.DiscoveryResponse, error) {
+ client, cleanup := createTestHTTPResourceClient(t, testHTTPResourceConfig(), ProtocolV3, func(_ *v3.DiscoveryRequest) (*v3.DiscoveryResponse, error) {
return nil, nil
})
defer cleanup()
@@ -148,7 +146,7 @@ func TestHTTPResourceClientFetchFullResponse(t *testing.T) {
}
func TestHTTPResourceClientServerError(t *testing.T) {
- client, cleanup := createTestHTTPResourceClient(t, testHTTPResourceConfig(), ProtocolV3, func(request *v3.DiscoveryRequest) (*v3.DiscoveryResponse, error) {
+ client, cleanup := createTestHTTPResourceClient(t, testHTTPResourceConfig(), ProtocolV3, func(_ *v3.DiscoveryRequest) (*v3.DiscoveryResponse, error) {
return nil, errors.New("server error")
})
defer cleanup()
diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go
index d1d540aaf4..6208e6182a 100644
--- a/discovery/xds/kuma.go
+++ b/discovery/xds/kuma.go
@@ -14,15 +14,16 @@
package xds
import (
+ "errors"
"fmt"
+ "log/slog"
"net/url"
"time"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"google.golang.org/protobuf/types/known/anypb"
"github.com/prometheus/prometheus/discovery"
@@ -99,7 +100,7 @@ func (c *KumaSDConfig) SetDirectory(dir string) {
func (c *KumaSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
logger := opts.Logger
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
return NewKumaHTTPDiscovery(c, logger, opts.Metrics)
@@ -158,10 +159,10 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L
return targets, nil
}
-func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) {
+func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) {
m, ok := metrics.(*xdsMetrics)
if !ok {
- return nil, fmt.Errorf("invalid discovery metrics type")
+ return nil, errors.New("invalid discovery metrics type")
}
// Default to "prometheus" if hostname is unavailable.
@@ -170,7 +171,7 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discove
var err error
clientID, err = osutil.GetFQDN()
if err != nil {
- level.Debug(logger).Log("msg", "error getting FQDN", "err", err)
+ logger.Debug("error getting FQDN", "err", err)
clientID = "prometheus"
}
}
diff --git a/discovery/xds/kuma_mads.pb.go b/discovery/xds/kuma_mads.pb.go
index b1079bf23f..210a5343a4 100644
--- a/discovery/xds/kuma_mads.pb.go
+++ b/discovery/xds/kuma_mads.pb.go
@@ -23,13 +23,14 @@ package xds
import (
context "context"
+ reflect "reflect"
+ sync "sync"
+
v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
_ "github.com/envoyproxy/protoc-gen-validate/validate"
_ "google.golang.org/genproto/googleapis/api/annotations"
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
- reflect "reflect"
- sync "sync"
)
const (
diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go
index cfb9cbac50..23d754c4b7 100644
--- a/discovery/xds/kuma_test.go
+++ b/discovery/xds/kuma_test.go
@@ -201,9 +201,8 @@ func TestKumaMadsV1ResourceParserInvalidResources(t *testing.T) {
}}
groups, err := kumaMadsV1ResourceParser(resources, KumaMadsV1ResourceTypeURL)
require.Nil(t, groups)
- require.Error(t, err)
- require.Contains(t, err.Error(), "cannot parse")
+ require.ErrorContains(t, err, "cannot parse")
}
func TestNewKumaHTTPDiscovery(t *testing.T) {
diff --git a/discovery/xds/metrics.go b/discovery/xds/metrics.go
index 597d516566..bdc9598f2c 100644
--- a/discovery/xds/metrics.go
+++ b/discovery/xds/metrics.go
@@ -29,7 +29,7 @@ type xdsMetrics struct {
metricRegisterer discovery.MetricRegisterer
}
-func newDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func newDiscovererMetrics(reg prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
m := &xdsMetrics{
fetchFailuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go
index 8191d6be1a..db55a2b6f7 100644
--- a/discovery/xds/xds.go
+++ b/discovery/xds/xds.go
@@ -15,11 +15,10 @@ package xds
import (
"context"
+ "log/slog"
"time"
v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
- "github.com/go-kit/log"
- "github.com/go-kit/log/level"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"google.golang.org/protobuf/encoding/protojson"
@@ -104,7 +103,7 @@ type fetchDiscovery struct {
refreshInterval time.Duration
parseResources resourceParser
- logger log.Logger
+ logger *slog.Logger
metrics *xdsMetrics
}
@@ -140,7 +139,7 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou
}
if err != nil {
- level.Error(d.logger).Log("msg", "error parsing resources", "err", err)
+ d.logger.Error("error parsing resources", "err", err)
d.metrics.fetchFailuresCount.Inc()
return
}
@@ -153,12 +152,12 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou
parsedTargets, err := d.parseResources(response.Resources, response.TypeUrl)
if err != nil {
- level.Error(d.logger).Log("msg", "error parsing resources", "err", err)
+ d.logger.Error("error parsing resources", "err", err)
d.metrics.fetchFailuresCount.Inc()
return
}
- level.Debug(d.logger).Log("msg", "Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets))
+ d.logger.Debug("Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets))
select {
case <-ctx.Done():
diff --git a/discovery/xds/xds_test.go b/discovery/xds/xds_test.go
index 7cce021c5f..af2784bcb2 100644
--- a/discovery/xds/xds_test.go
+++ b/discovery/xds/xds_test.go
@@ -22,9 +22,9 @@ import (
"time"
v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
- "github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"google.golang.org/protobuf/types/known/anypb"
@@ -85,12 +85,12 @@ func createTestHTTPServer(t *testing.T, responder discoveryResponder) *httptest.
}
func constantResourceParser(targets []model.LabelSet, err error) resourceParser {
- return func(resources []*anypb.Any, typeUrl string) ([]model.LabelSet, error) {
+ return func(_ []*anypb.Any, _ string) ([]model.LabelSet, error) {
return targets, err
}
}
-var nopLogger = log.NewNopLogger()
+var nopLogger = promslog.NewNopLogger()
type testResourceClient struct {
resourceTypeURL string
@@ -120,7 +120,7 @@ func (rc testResourceClient) Close() {
func TestPollingRefreshSkipUpdate(t *testing.T) {
rc := &testResourceClient{
- fetch: func(ctx context.Context) (*v3.DiscoveryResponse, error) {
+ fetch: func(_ context.Context) (*v3.DiscoveryResponse, error) {
return nil, nil
},
}
@@ -167,7 +167,7 @@ func TestPollingRefreshAttachesGroupMetadata(t *testing.T) {
rc := &testResourceClient{
server: server,
protocolVersion: ProtocolV3,
- fetch: func(ctx context.Context) (*v3.DiscoveryResponse, error) {
+ fetch: func(_ context.Context) (*v3.DiscoveryResponse, error) {
return &v3.DiscoveryResponse{}, nil
},
}
@@ -223,14 +223,14 @@ func TestPollingDisappearingTargets(t *testing.T) {
rc := &testResourceClient{
server: server,
protocolVersion: ProtocolV3,
- fetch: func(ctx context.Context) (*v3.DiscoveryResponse, error) {
+ fetch: func(_ context.Context) (*v3.DiscoveryResponse, error) {
return &v3.DiscoveryResponse{}, nil
},
}
// On the first poll, send back two targets. On the next, send just one.
counter := 0
- parser := func(resources []*anypb.Any, typeUrl string) ([]model.LabelSet, error) {
+ parser := func(_ []*anypb.Any, _ string) ([]model.LabelSet, error) {
counter++
if counter == 1 {
return []model.LabelSet{
diff --git a/discovery/zookeeper/zookeeper.go b/discovery/zookeeper/zookeeper.go
index 92904dd71c..af26cc5a0e 100644
--- a/discovery/zookeeper/zookeeper.go
+++ b/discovery/zookeeper/zookeeper.go
@@ -18,15 +18,16 @@ import (
"encoding/json"
"errors"
"fmt"
+ "log/slog"
"net"
"strconv"
"strings"
"time"
- "github.com/go-kit/log"
"github.com/go-zookeeper/zk"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
+ "github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -58,7 +59,7 @@ type ServersetSDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*ServersetSDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*ServersetSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &discovery.NoopDiscovererMetrics{}
}
@@ -100,7 +101,7 @@ type NerveSDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
-func (*NerveSDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
+func (*NerveSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &discovery.NoopDiscovererMetrics{}
}
@@ -146,16 +147,16 @@ type Discovery struct {
treeCaches []*treecache.ZookeeperTreeCache
parse func(data []byte, path string) (model.LabelSet, error)
- logger log.Logger
+ logger *slog.Logger
}
// NewNerveDiscovery returns a new Discovery for the given Nerve config.
-func NewNerveDiscovery(conf *NerveSDConfig, logger log.Logger) (*Discovery, error) {
+func NewNerveDiscovery(conf *NerveSDConfig, logger *slog.Logger) (*Discovery, error) {
return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseNerveMember)
}
// NewServersetDiscovery returns a new Discovery for the given serverset config.
-func NewServersetDiscovery(conf *ServersetSDConfig, logger log.Logger) (*Discovery, error) {
+func NewServersetDiscovery(conf *ServersetSDConfig, logger *slog.Logger) (*Discovery, error) {
return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseServersetMember)
}
@@ -165,11 +166,11 @@ func NewDiscovery(
srvs []string,
timeout time.Duration,
paths []string,
- logger log.Logger,
+ logger *slog.Logger,
pf func(data []byte, path string) (model.LabelSet, error),
) (*Discovery, error) {
if logger == nil {
- logger = log.NewNopLogger()
+ logger = promslog.NewNopLogger()
}
conn, _, err := zk.Connect(
diff --git a/discovery/zookeeper/zookeeper_test.go b/discovery/zookeeper/zookeeper_test.go
index c2b41ce7a3..e496dfef51 100644
--- a/discovery/zookeeper/zookeeper_test.go
+++ b/discovery/zookeeper/zookeeper_test.go
@@ -26,11 +26,13 @@ func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}
+// TestNewDiscoveryError can fail if the DNS resolver mistakenly resolves the domain below.
+// See https://github.com/prometheus/prometheus/issues/16191 for a precedent.
func TestNewDiscoveryError(t *testing.T) {
_, err := NewDiscovery(
- []string{"unreachable.test"},
+ []string{"unreachable.invalid"},
time.Second, []string{"/"},
nil,
- func(data []byte, path string) (model.LabelSet, error) { return nil, nil })
+ func(_ []byte, _ string) (model.LabelSet, error) { return nil, nil })
require.Error(t, err)
}
diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md
index 2faf65105e..e90a7574ba 100644
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@@ -2,12 +2,9 @@
title: prometheus
---
-# prometheus
-
The Prometheus monitoring server
-
## Flags
| Flag | Description | Default |
@@ -15,11 +12,15 @@ The Prometheus monitoring server
| -h, --help | Show context-sensitive help (also try --help-long and --help-man). | |
| --version | Show application version. | |
| --config.file | Prometheus configuration file path. | `prometheus.yml` |
-| --web.listen-address | Address to listen on for UI, API, and telemetry. | `0.0.0.0:9090` |
+| --config.auto-reload-interval | Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` |
+| --web.listen-address ... | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` |
+| --auto-gomaxprocs | Automatically set GOMAXPROCS to match Linux container CPU quota | `true` |
+| --auto-gomemlimit | Automatically set GOMEMLIMIT to match Linux container or system memory limit | `true` |
| --auto-gomemlimit.ratio | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` |
| --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | |
| --web.read-timeout | Maximum duration before timing out read of the request, and closing idle connections. | `5m` |
-| --web.max-connections | Maximum number of simultaneous connections. | `512` |
+| --web.max-connections | Maximum number of simultaneous connections across all listeners. | `512` |
+| --web.max-notifications-subscribers | Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close. | `16` |
| --web.external-url | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. | |
| --web.route-prefix | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. | |
| --web.user-assets | Path to static asset directory, available at /user. | |
@@ -27,18 +28,18 @@ The Prometheus monitoring server
| --web.enable-admin-api | Enable API endpoints for admin control actions. | `false` |
| --web.enable-remote-write-receiver | Enable API endpoint accepting remote write requests. | `false` |
| --web.remote-write-receiver.accepted-protobuf-messages | List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: prometheus.WriteRequest, io.prometheus.write.v2.Request | `prometheus.WriteRequest` |
+| --web.enable-otlp-receiver | Enable API endpoint accepting OTLP write requests. | `false` |
| --web.console.templates | Path to the console template directory, available at /consoles. | `consoles` |
| --web.console.libraries | Path to the console library directory. | `console_libraries` |
| --web.page-title | Document title of Prometheus instance. | `Prometheus Time Series Collection and Processing Server` |
| --web.cors.origin | Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1\|domain2)\.com' | `.*` |
| --storage.tsdb.path | Base path for metrics storage. Use with server mode only. | `data/` |
-| --storage.tsdb.retention | [DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use "storage.tsdb.retention.time" instead. Use with server mode only. | |
-| --storage.tsdb.retention.time | How long to retain samples in storage. When this flag is set it overrides "storage.tsdb.retention". If neither this flag nor "storage.tsdb.retention" nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | |
+| --storage.tsdb.retention.time | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | |
| --storage.tsdb.retention.size | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | |
| --storage.tsdb.no-lockfile | Do not create lockfile in data directory. Use with server mode only. | `false` |
| --storage.tsdb.head-chunks-write-queue-size | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` |
| --storage.agent.path | Base path for metrics storage. Use with agent mode only. | `data-agent/` |
-| --storage.agent.wal-compression | Compress the agent WAL. Use with agent mode only. | `true` |
+| --storage.agent.wal-compression | Compress the agent WAL. If false, the --storage.agent.wal-compression-type flag is ignored. Use with agent mode only. | `true` |
| --storage.agent.retention.min-time | Minimum age samples may be before being considered for deletion when the WAL is truncated Use with agent mode only. | |
| --storage.agent.retention.max-time | Maximum age samples may be before being forcibly deleted when the WAL is truncated Use with agent mode only. | |
| --storage.agent.no-lockfile | Do not create lockfile in data directory. Use with agent mode only. | `false` |
@@ -51,12 +52,14 @@ The Prometheus monitoring server
| --rules.alert.resend-delay | Minimum amount of time to wait before resending an alert to Alertmanager. Use with server mode only. | `1m` |
| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. When set, "query.max-concurrency" may need to be adjusted accordingly. Use with server mode only. | `4` |
| --alertmanager.notification-queue-capacity | The capacity of the queue for pending Alertmanager notifications. Use with server mode only. | `10000` |
+| --alertmanager.notification-batch-size | The maximum number of notifications per batch to send to the Alertmanager. Use with server mode only. | `256` |
| --alertmanager.drain-notification-queue-on-shutdown | Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down. Use with server mode only. | `true` |
| --query.lookback-delta | The maximum lookback duration for retrieving metrics during expression evaluations and federation. Use with server mode only. | `5m` |
| --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
| --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
| --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
-| --enable-feature | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
+| --enable-feature ... | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
+| --agent | Run Prometheus in 'Agent mode'. | |
| --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
| --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` |
diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md
index 443cd3f0cb..92e0ac0030 100644
--- a/docs/command-line/promtool.md
+++ b/docs/command-line/promtool.md
@@ -2,12 +2,9 @@
title: promtool
---
-# promtool
-
Tooling for the Prometheus monitoring system.
-
## Flags
| Flag | Description |
@@ -15,7 +12,7 @@ Tooling for the Prometheus monitoring system.
| -h, --help | Show context-sensitive help (also try --help-long and --help-man). |
| --version | Show application version. |
| --experimental | Enable experimental commands. |
-| --enable-feature | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. |
+| --enable-feature ... | Comma separated feature names to enable. Valid options: promql-experimental-functions, promql-delayed-name-removal. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details |
@@ -59,9 +56,10 @@ Check the resources for validity.
#### Flags
-| Flag | Description |
-| --- | --- |
-| --extended | Print extended information related to the cardinality of the metrics. |
+| Flag | Description | Default |
+| --- | --- | --- |
+| --query.lookback-delta | The server's maximum query lookback duration. | `5m` |
+| --extended | Print extended information related to the cardinality of the metrics. | |
@@ -102,8 +100,9 @@ Check if the config files are valid or not.
| Flag | Description | Default |
| --- | --- | --- |
| --syntax-only | Only check the config file syntax, ignoring file and content validation referenced in the config | |
-| --lint | Linting checks to apply to the rules specified in the config. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` |
+| --lint | Linting checks to apply to the rules/scrape configs specified in the config. Available options are: all, duplicate-rules, none, too-long-scrape-interval. Use --lint=none to disable linting | `duplicate-rules` |
| --lint-fatal | Make lint errors exit with exit code 3. | `false` |
+| --ignore-unknown-fields | Ignore unknown fields in the rule groups read by the config files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` |
| --agent | Check config file for Prometheus in Agent mode. | |
@@ -177,6 +176,7 @@ Check if the rule files are valid or not.
| --- | --- | --- |
| --lint | Linting checks to apply. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` |
| --lint-fatal | Make lint errors exit with exit code 3. | `false` |
+| --ignore-unknown-fields | Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` |
@@ -281,7 +281,7 @@ Run series query.
| Flag | Description |
| --- | --- |
-| --match | Series selector. Can be specified multiple times. |
+| --match ... | Series selector. Can be specified multiple times. |
| --start | Start time (RFC3339 or Unix timestamp). |
| --end | End time (RFC3339 or Unix timestamp). |
@@ -309,7 +309,7 @@ Run labels query.
| --- | --- |
| --start | Start time (RFC3339 or Unix timestamp). |
| --end | End time (RFC3339 or Unix timestamp). |
-| --match | Series selector. Can be specified multiple times. |
+| --match ... | Series selector. Can be specified multiple times. |
@@ -338,7 +338,7 @@ Run queries against your Prometheus to analyze the usage pattern of certain metr
| --type | Type of metric: histogram. | |
| --duration | Time frame to analyze. | `1h` |
| --time | Query time (RFC3339 or Unix timestamp), defaults to now. | |
-| --match | Series selector. Can be specified multiple times. | |
+| --match ... | Series selector. Can be specified multiple times. | |
@@ -442,6 +442,15 @@ Unit testing.
+#### Flags
+
+| Flag | Description |
+| --- | --- |
+| --junit | File path to store JUnit XML test results. |
+
+
+
+
##### `promtool test rules`
Unit tests for rules.
@@ -452,8 +461,10 @@ Unit tests for rules.
| Flag | Description | Default |
| --- | --- | --- |
-| --run | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | |
+| --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | |
+| --debug | Enable unit test debugging. | `false` |
| --diff | [Experimental] Print colored differential output between expected & received output. | `false` |
+| --ignore-unknown-fields | Ignore unknown fields in the test files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` |
@@ -566,10 +577,10 @@ Dump samples from a TSDB.
| Flag | Description | Default |
| --- | --- | --- |
-| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` |
-| --min-time | Minimum timestamp to dump. | `-9223372036854775808` |
-| --max-time | Maximum timestamp to dump. | `9223372036854775807` |
-| --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
+| --sandbox-dir-root | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | |
+| --min-time | Minimum timestamp to dump, in milliseconds since the Unix epoch. | `-9223372036854775808` |
+| --max-time | Maximum timestamp to dump, in milliseconds since the Unix epoch. | `9223372036854775807` |
+| --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
@@ -593,10 +604,10 @@ Dump samples from a TSDB.
| Flag | Description | Default |
| --- | --- | --- |
-| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` |
-| --min-time | Minimum timestamp to dump. | `-9223372036854775808` |
-| --max-time | Maximum timestamp to dump. | `9223372036854775807` |
-| --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
+| --sandbox-dir-root | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | |
+| --min-time | Minimum timestamp to dump, in milliseconds since the Unix epoch. | `-9223372036854775808` |
+| --max-time | Maximum timestamp to dump, in milliseconds since the Unix epoch. | `9223372036854775807` |
+| --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
@@ -632,6 +643,15 @@ Import samples from OpenMetrics input and produce TSDB blocks. Please refer to t
+###### Flags
+
+| Flag | Description |
+| --- | --- |
+| --label | Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value |
+
+
+
+
###### Arguments
| Argument | Description | Default | Required |
diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md
index 3c1ec84f0f..faffad56f2 100644
--- a/docs/configuration/alerting_rules.md
+++ b/docs/configuration/alerting_rules.md
@@ -3,15 +3,13 @@ title: Alerting rules
sort_rank: 3
---
-# Alerting rules
-
Alerting rules allow you to define alert conditions based on Prometheus
expression language expressions and to send notifications about firing alerts
to an external service. Whenever the alert expression results in one or more
vector elements at a given point in time, the alert counts as active for these
elements' label sets.
-### Defining alerting rules
+## Defining alerting rules
Alerting rules are configured in Prometheus in the same way as [recording
rules](recording_rules.md).
@@ -21,10 +19,13 @@ An example rules file with an alert would be:
```yaml
groups:
- name: example
+ labels:
+ team: myteam
rules:
- alert: HighRequestLatency
expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5
for: 10m
+ keep_firing_for: 5m
labels:
severity: page
annotations:
@@ -38,13 +39,20 @@ the alert continues to be active during each evaluation for 10 minutes before
firing the alert. Elements that are active, but not firing yet, are in the pending state.
Alerting rules without the `for` clause will become active on the first evaluation.
+There is also an optional `keep_firing_for` clause that tells Prometheus to keep
+this alert firing for the specified duration after the firing condition was last met.
+This can be used to prevent situations such as flapping alerts, false resolutions
+due to lack of data loss, etc. Alerting rules without the `keep_firing_for` clause
+will deactivate on the first evaluation where the condition is not met (assuming
+any optional `for` duration described above has been satisfied).
+
The `labels` clause allows specifying a set of additional labels to be attached
to the alert. Any existing conflicting labels will be overwritten. The label
values can be templated.
The `annotations` clause specifies a set of informational labels that can be used to store longer additional information such as alert descriptions or runbook links. The annotation values can be templated.
-#### Templating
+### Templating
Label and annotation values can be templated using [console
templates](https://prometheus.io/docs/visualization/consoles). The `$labels`
@@ -83,7 +91,7 @@ groups:
description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)"
```
-### Inspecting alerts during runtime
+## Inspecting alerts during runtime
To manually inspect which alerts are active (pending or firing), navigate to
the "Alerts" tab of your Prometheus instance. This will show you the exact
@@ -95,7 +103,7 @@ The sample value is set to `1` as long as the alert is in the indicated active
(pending or firing) state, and the series is marked stale when this is no
longer the case.
-### Sending alert notifications
+## Sending alert notifications
Prometheus's alerting rules are good at figuring what is broken *right now*, but
they are not a fully-fledged notification solution. Another layer is needed to
@@ -104,6 +112,6 @@ on top of the simple alert definitions. In Prometheus's ecosystem, the
[Alertmanager](https://prometheus.io/docs/alerting/alertmanager/) takes on this
role. Thus, Prometheus may be configured to periodically send information about
alert states to an Alertmanager instance, which then takes care of dispatching
-the right notifications.
+the right notifications.
Prometheus can be [configured](configuration.md) to automatically discover available
Alertmanager instances through its service discovery integrations.
diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md
index 35976871b9..45f099af4e 100644
--- a/docs/configuration/configuration.md
+++ b/docs/configuration/configuration.md
@@ -3,8 +3,6 @@ title: Configuration
sort_rank: 1
---
-# Configuration
-
Prometheus is configured via command-line flags and a configuration file. While
the command-line flags configure immutable system parameters (such as storage
locations, amount of data to keep on disk and in memory, etc.), the
@@ -59,6 +57,7 @@ global:
[ scrape_interval: | default = 1m ]
# How long until a scrape request times out.
+ # It cannot be greater than the scrape interval.
[ scrape_timeout: | default = 10s ]
# The protocols to negotiate during a scrape with the client.
@@ -70,13 +69,20 @@ global:
# How frequently to evaluate rules.
[ evaluation_interval: | default = 1m ]
-
- # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received.
- # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping.
+
+ # Offset the rule evaluation timestamp of this particular group by the
+ # specified duration into the past to ensure the underlying metrics have
+ # been received. Metric availability delays are more likely to occur when
+ # Prometheus is running as a remote write target, but can also occur when
+ # there's anomalies with scraping.
[ rule_query_offset: | default = 0s ]
# The labels to add to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
+ # Environment variable references `${var}` or `$var` are replaced according
+ # to the values of the current environment variables.
+ # References to undefined variables are replaced by the empty string.
+ # The `$` character can be escaped by using `$$`.
external_labels:
[ : ... ]
@@ -84,33 +90,39 @@ global:
# Reloading the configuration will reopen the file.
[ query_log_file: ]
+ # File to which scrape failures are logged.
+ # Reloading the configuration will reopen the file.
+ [ scrape_failure_log_file: ]
+
# An uncompressed response body larger than this many bytes will cause the
# scrape to fail. 0 means no limit. Example: 100MB.
# This is an experimental feature, this behaviour could
# change or be removed in the future.
[ body_size_limit: | default = 0 ]
- # Per-scrape limit on number of scraped samples that will be accepted.
+ # Per-scrape limit on the number of scraped samples that will be accepted.
# If more than this number of samples are present after metric relabeling
# the entire scrape will be treated as failed. 0 means no limit.
[ sample_limit: | default = 0 ]
- # Per-scrape limit on number of labels that will be accepted for a sample. If
- # more than this number of labels are present post metric-relabeling, the
- # entire scrape will be treated as failed. 0 means no limit.
+ # Limit on the number of labels that will be accepted per sample. If more
+ # than this number of labels are present on any sample post metric-relabeling,
+ # the entire scrape will be treated as failed. 0 means no limit.
[ label_limit: | default = 0 ]
- # Per-scrape limit on length of labels name that will be accepted for a sample.
- # If a label name is longer than this number post metric-relabeling, the entire
- # scrape will be treated as failed. 0 means no limit.
+ # Limit on the length (in bytes) of each individual label name. If any label
+ # name in a scrape is longer than this number post metric-relabeling, the
+ # entire scrape will be treated as failed. Note that label names are UTF-8
+ # encoded, and characters can take up to 4 bytes. 0 means no limit.
[ label_name_length_limit: | default = 0 ]
- # Per-scrape limit on length of labels value that will be accepted for a sample.
- # If a label value is longer than this number post metric-relabeling, the
- # entire scrape will be treated as failed. 0 means no limit.
+ # Limit on the length (in bytes) of each individual label value. If any label
+ # value in a scrape is longer than this number post metric-relabeling, the
+ # entire scrape will be treated as failed. Note that label values are UTF-8
+ # encoded, and characters can take up to 4 bytes. 0 means no limit.
[ label_value_length_limit: | default = 0 ]
- # Per-scrape config limit on number of unique targets that will be
+ # Limit per scrape config on number of unique targets that will be
# accepted. If more than this number of targets are present after target
# relabeling, Prometheus will mark the targets as failed without scraping them.
# 0 means no limit. This is an experimental feature, this behaviour could
@@ -121,6 +133,20 @@ global:
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: | default = 0 ]
+ # Specifies the validation scheme for metric and label names. Either blank or
+ # "utf8" for full UTF-8 support, or "legacy" for letters, numbers, colons,
+ # and underscores.
+ [ metric_name_validation_scheme: | default "utf8" ]
+
+ # Specifies whether to convert all scraped classic histograms into native
+ # histograms with custom buckets.
+ [ convert_classic_histograms_to_nhcb: | default = false]
+
+ # Specifies whether to scrape a classic histogram, even if it is also exposed as a native
+ # histogram (has no effect without --enable-feature=native-histograms).
+ [ always_scrape_classic_histograms: | default = false ]
+
+
runtime:
# Configure the Go garbage collector GOGC parameter
# See: https://tip.golang.org/doc/gc-guide#GOGC
@@ -152,6 +178,46 @@ alerting:
remote_write:
[ - ... ]
+# Settings related to the OTLP receiver feature.
+# See https://prometheus.io/docs/guides/opentelemetry/ for best practices.
+otlp:
+ # Promote specific list of resource attributes to labels.
+ # It cannot be configured simultaneously with 'promote_all_resource_attributes: true'.
+ [ promote_resource_attributes: [, ...] | default = [ ] ]
+ # Promoting all resource attributes to labels, except for the ones configured with 'ignore_resource_attributes'.
+ # Be aware that changes in attributes received by the OTLP endpoint may result in time series churn and lead to high memory usage by the Prometheus server.
+ # It cannot be set to 'true' simultaneously with 'promote_resource_attributes'.
+ [ promote_all_resource_attributes: | default = false ]
+ # Which resource attributes to ignore, can only be set when 'promote_all_resource_attributes' is true.
+ [ ignore_resource_attributes: [, ...] | default = [] ]
+ # Configures translation of OTLP metrics when received through the OTLP metrics
+ # endpoint. Available values:
+ # - "UnderscoreEscapingWithSuffixes" refers to commonly agreed normalization used
+ # by OpenTelemetry in https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheus
+ # - "NoUTF8EscapingWithSuffixes" is a mode that relies on UTF-8 support in Prometheus.
+ # It preserves all special characters like dots, but still adds required metric name suffixes
+ # for units and _total, as UnderscoreEscapingWithSuffixes does.
+ # - (EXPERIMENTAL) "NoTranslation" is a mode that relies on UTF-8 support in Prometheus.
+ # It preserves all special character like dots and won't append special suffixes for metric
+ # unit and type.
+ #
+ # WARNING: The "NoTranslation" setting has significant known risks and limitations (see https://prometheus.io/docs/practices/naming/
+ # for details):
+ # * Impaired UX when using PromQL in plain YAML (e.g. alerts, rules, dashboard, autoscaling configuration).
+ # * Series collisions which in the best case may result in OOO errors, in the worst case a silently malformed
+ # time series. For instance, you may end up in situation of ingesting `foo.bar` series with unit
+ # `seconds` and a separate series `foo.bar` with unit `milliseconds`.
+ [ translation_strategy: | default = "UnderscoreEscapingWithSuffixes" ]
+ # Enables adding "service.name", "service.namespace" and "service.instance.id"
+ # resource attributes to the "target_info" metric, on top of converting
+ # them into the "instance" and "job" labels.
+ [ keep_identifying_resource_attributes: | default = false ]
+ # Configures optional translation of OTLP explicit bucket histograms into native histograms with custom buckets.
+ [ convert_histograms_to_nhcb: | default = false ]
+ # Enables promotion of OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
+ # This is disabled by default for backwards compatibility, but according to OTel spec, scope metadata _should_ be identifying, i.e. translated to metric labels.
+ [ promote_scope_metadata: | default = false ]
+
# Settings related to the remote read feature.
remote_read:
[ - ... ]
@@ -186,16 +252,24 @@ job_name:
[ scrape_interval: | default = ]
# Per-scrape timeout when scraping this job.
+# It cannot be greater than the scrape interval.
[ scrape_timeout: | default = ]
# The protocols to negotiate during a scrape with the client.
# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
-# OpenMetricsText1.0.0, PrometheusText0.0.4.
+# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0.
[ scrape_protocols: [, ...] | default = ]
-# Whether to scrape a classic histogram that is also exposed as a native
+# Fallback protocol to use if a scrape returns blank, unparseable, or otherwise
+# invalid Content-Type.
+# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
+# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0.
+[ fallback_scrape_protocol: ]
+
+# Whether to scrape a classic histogram, even if it is also exposed as a native
# histogram (has no effect without --enable-feature=native-histograms).
-[ scrape_classic_histograms: | default = false ]
+[ always_scrape_classic_histograms: |
+default = ]
# The HTTP resource path on which to fetch metrics from targets.
[ metrics_path: | default = /metrics ]
@@ -251,53 +325,13 @@ params:
# response from the scraped target.
[ enable_compression: | default = true ]
-# Sets the `Authorization` header on every scrape request with the
-# configured username and password.
-# password and password_file are mutually exclusive.
-basic_auth:
- [ username: ]
- [ password: ]
- [ password_file: ]
-
-# Sets the `Authorization` header on every scrape request with
-# the configured credentials.
-authorization:
- # Sets the authentication type of the request.
- [ type: | default: Bearer ]
- # Sets the credentials of the request. It is mutually exclusive with
- # `credentials_file`.
- [ credentials: ]
- # Sets the credentials of the request with the credentials read from the
- # configured file. It is mutually exclusive with `credentials`.
- [ credentials_file: ]
-
-# Optional OAuth 2.0 configuration.
-# Cannot be used at the same time as basic_auth or authorization.
-oauth2:
- [ ]
-
-# Configure whether scrape requests follow HTTP 3xx redirects.
-[ follow_redirects: | default = true ]
-
-# Whether to enable HTTP2.
-[ enable_http2: | default: true ]
-
-# Configures the scrape request's TLS settings.
-tls_config:
- [ ]
-
-# Optional proxy URL.
-[ proxy_url: ]
-# Comma-separated string that can contain IPs, CIDR notation, domain names
-# that should be excluded from proxying. IP and domain names can
-# contain port numbers.
-[ no_proxy: ]
-# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
-[ proxy_from_environment: | default: false ]
-# Specifies headers to send to proxies during CONNECT requests.
-[ proxy_connect_header:
- [ : [, ...] ] ]
+# File to which scrape failures are logged.
+# Reloading the configuration will reopen the file.
+[ scrape_failure_log_file: ]
+# HTTP client settings, including authentication methods (such as basic auth and
+# authorization), proxy configurations, TLS options, custom HTTP headers, etc.
+[ ]
# List of Azure service discovery configurations.
azure_sd_configs:
@@ -400,6 +434,10 @@ scaleway_sd_configs:
serverset_sd_configs:
[ - ... ]
+# List of STACKIT service discovery configurations.
+stackit_sd_configs:
+ [ - ... ]
+
# List of Triton service discovery configurations.
triton_sd_configs:
[ - ... ]
@@ -426,48 +464,73 @@ metric_relabel_configs:
# change or be removed in the future.
[ body_size_limit: | default = 0 ]
-# Per-scrape limit on number of scraped samples that will be accepted.
+# Per-scrape limit on the number of scraped samples that will be accepted.
# If more than this number of samples are present after metric relabeling
# the entire scrape will be treated as failed. 0 means no limit.
[ sample_limit: | default = 0 ]
-# Per-scrape limit on number of labels that will be accepted for a sample. If
-# more than this number of labels are present post metric-relabeling, the
-# entire scrape will be treated as failed. 0 means no limit.
+# Limit on the number of labels that will be accepted per sample. If more
+# than this number of labels are present on any sample post metric-relabeling,
+# the entire scrape will be treated as failed. 0 means no limit.
[ label_limit: | default = 0 ]
-# Per-scrape limit on length of labels name that will be accepted for a sample.
-# If a label name is longer than this number post metric-relabeling, the entire
-# scrape will be treated as failed. 0 means no limit.
+# Limit on the length (in bytes) of each individual label name. If any label
+# name in a scrape is longer than this number post metric-relabeling, the
+# entire scrape will be treated as failed. Note that label names are UTF-8
+# encoded, and characters can take up to 4 bytes. 0 means no limit.
[ label_name_length_limit: | default = 0 ]
-# Per-scrape limit on length of labels value that will be accepted for a sample.
-# If a label value is longer than this number post metric-relabeling, the
-# entire scrape will be treated as failed. 0 means no limit.
+# Limit on the length (in bytes) of each individual label value. If any label
+# value in a scrape is longer than this number post metric-relabeling, the
+# entire scrape will be treated as failed. Note that label values are UTF-8
+# encoded, and characters can take up to 4 bytes. 0 means no limit.
[ label_value_length_limit: | default = 0 ]
-# Per-scrape config limit on number of unique targets that will be
+# Limit per scrape config on number of unique targets that will be
# accepted. If more than this number of targets are present after target
# relabeling, Prometheus will mark the targets as failed without scraping them.
# 0 means no limit. This is an experimental feature, this behaviour could
# change in the future.
[ target_limit: | default = 0 ]
-# Per-job limit on the number of targets dropped by relabeling
+# Limit per scrape config on the number of targets dropped by relabeling
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: | default = 0 ]
+# Specifies the validation scheme for metric and label names. Either blank or
+# "utf8" for full UTF-8 support, or "legacy" for letters, numbers, colons, and
+# underscores.
+[ metric_name_validation_scheme: | default "utf8" ]
+
+# Specifies the character escaping scheme that will be requested when scraping
+# for metric and label names that do not conform to the legacy Prometheus
+# character set. Available options are:
+# * `allow-utf-8`: Full UTF-8 support, no escaping needed.
+# * `underscores`: Escape all legacy-invalid characters to underscores.
+# * `dots`: Escapes dots to `_dot_`, underscores to `__`, and all other
+# legacy-invalid characters to underscores.
+# * `values`: Prepend the name with `U__` and replace all invalid
+# characters with their unicode value, surrounded by underscores. Single
+# underscores are replaced with double underscores.
+# e.g. "U__my_2e_dotted_2e_name".
+# If this value is left blank, Prometheus will default to `allow-utf-8` if the
+# validation scheme for the current scrape config is set to utf8, or
+# `underscores` if the validation scheme is set to `legacy`.
+[ metric_name_escaping_scheme: | default "allow-utf-8" ]
+
# Limit on total number of positive and negative buckets allowed in a single
-# native histogram. If this is exceeded, the entire scrape will be treated as
-# failed. 0 means no limit.
+# native histogram. The resolution of a histogram with more buckets will be
+# reduced until the number of buckets is within the limit. If the limit cannot
+# be reached, the scrape will fail.
+# 0 means no limit.
[ native_histogram_bucket_limit: | default = 0 ]
# Lower limit for the growth factor of one bucket to the next in each native
# histogram. The resolution of a histogram with a lower growth factor will be
-# reduced until it is within the limit.
+# reduced as much as possible until it is within the limit.
# To set an upper limit for the schema (equivalent to "scale" in OTel's
# exponential histograms), use the following factor limits:
-#
+#
# +----------------------------+----------------------------+
# | growth factor | resulting schema AKA scale |
# +----------------------------+----------------------------+
@@ -497,14 +560,86 @@ metric_relabel_configs:
# +----------------------------+----------------------------+
# | 1.002 | 8 |
# +----------------------------+----------------------------+
-#
+#
# 0 results in the smallest supported factor (which is currently ~1.0027 or
# schema 8, but might change in the future).
[ native_histogram_min_bucket_factor: | default = 0 ]
+
+# Specifies whether to convert classic histograms into native histograms with
+# custom buckets (has no effect without --enable-feature=native-histograms).
+[ convert_classic_histograms_to_nhcb: | default =
+]
```
Where `` must be unique across all scrape configurations.
+### ``
+
+A `http_config` allows configuring HTTP requests.
+
+```yaml
+# Sets the `Authorization` header on every request with the
+# configured username and password.
+# username and username_file are mutually exclusive.
+# password and password_file are mutually exclusive.
+basic_auth:
+ [ username: ]
+ [ username_file: ]
+ [ password: ]
+ [ password_file: ]
+
+# Sets the `Authorization` header on every request with
+# the configured credentials.
+authorization:
+ # Sets the authentication type of the request.
+ [ type: | default: Bearer ]
+ # Sets the credentials of the request. It is mutually exclusive with
+ # `credentials_file`.
+ [ credentials: ]
+ # Sets the credentials of the request with the credentials read from the
+ # configured file. It is mutually exclusive with `credentials`.
+ [ credentials_file: ]
+
+# Optional OAuth 2.0 configuration.
+# Cannot be used at the same time as basic_auth or authorization.
+oauth2:
+ [ ]
+
+# Configure whether requests follow HTTP 3xx redirects.
+[ follow_redirects: | default = true ]
+
+# Whether to enable HTTP2.
+[ enable_http2: | default: true ]
+
+# Configures the request's TLS settings.
+tls_config:
+ [ ]
+
+# Optional proxy URL.
+[ proxy_url: ]
+# Comma-separated string that can contain IPs, CIDR notation, domain names
+# that should be excluded from proxying. IP and domain names can
+# contain port numbers.
+[ no_proxy: ]
+# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
+[ proxy_from_environment: | default: false ]
+# Specifies headers to send to proxies during CONNECT requests.
+[ proxy_connect_header:
+ [ : [, ...] ] ]
+
+# Custom HTTP headers to be sent along with each request.
+# Headers that are set by Prometheus itself can't be overwritten.
+http_headers:
+ # Header name.
+ [ :
+ # Header values.
+ [ values: [, ...] ]
+ # Headers values. Hidden in configuration page.
+ [ secrets: [, ...] ]
+ # Files to read header values from.
+ [ files: [, ...] ] ]
+```
+
### ``
A `tls_config` allows configuring TLS connections.
@@ -543,7 +678,7 @@ A `tls_config` allows configuring TLS connections.
### ``
-OAuth 2.0 authentication using the client credentials grant type.
+OAuth 2.0 authentication using the client credentials or password grant type.
Prometheus fetches an access token from the specified endpoint with
the given client access and secret keys.
@@ -563,6 +698,11 @@ scopes:
token_url:
# Optional parameters to append to the token URL.
+# To set 'password' grant type, add it to params:
+# endpoint_params:
+# grant_type: 'password'
+# username: 'username@example.com'
+# password: 'strongpassword'
endpoint_params:
[ : ... ]
@@ -581,12 +721,31 @@ tls_config:
# Specifies headers to send to proxies during CONNECT requests.
[ proxy_connect_header:
[ : [, ...] ] ]
+
+# Custom HTTP headers to be sent along with each request.
+# Headers that are set by Prometheus itself can't be overwritten.
+http_headers:
+ # Header name.
+ [ :
+ # Header values.
+ [ values: [, ...] ]
+ # Headers values. Hidden in configuration page.
+ [ secrets: [, ...] ]
+ # Files to read header values from.
+ [ files: [, ...] ] ]
```
### ``
Azure SD configurations allow retrieving scrape targets from Azure VMs.
+The discovery requires at least the following permissions:
+
+* `Microsoft.Compute/virtualMachines/read`: Required for VM discovery
+* `Microsoft.Network/networkInterfaces/read`: Required for VM discovery
+* `Microsoft.Compute/virtualMachineScaleSets/virtualMachines/read`: Required for scale set (VMSS) discovery
+* `Microsoft.Compute/virtualMachineScaleSets/virtualMachines/networkInterfaces/read`: Required for scale set (VMSS) discovery
+
The following meta labels are available on targets during [relabeling](#relabel_config):
* `__meta_azure_machine_id`: the machine ID
@@ -634,53 +793,9 @@ subscription_id:
# instead be specified in the relabeling rule.
[ port: | default = 80 ]
-# Authentication information used to authenticate to the Azure API.
-# Note that `basic_auth`, `authorization` and `oauth2` options are
-# mutually exclusive.
-# `password` and `password_file` are mutually exclusive.
-
-# Optional HTTP basic authentication information, currently not support by Azure.
-basic_auth:
- [ username: ]
- [ password: ]
- [ password_file: ]
-
-# Optional `Authorization` header configuration, currently not supported by Azure.
-authorization:
- # Sets the authentication type.
- [ type: | default: Bearer ]
- # Sets the credentials. It is mutually exclusive with
- # `credentials_file`.
- [ credentials: ]
- # Sets the credentials to the credentials read from the configured file.
- # It is mutually exclusive with `credentials`.
- [ credentials_file: ]
-
-# Optional OAuth 2.0 configuration, currently not supported by Azure.
-oauth2:
- [ ]
-
-# Optional proxy URL.
-[ proxy_url: ]
-# Comma-separated string that can contain IPs, CIDR notation, domain names
-# that should be excluded from proxying. IP and domain names can
-# contain port numbers.
-[ no_proxy: ]
-# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
-[ proxy_from_environment: | default: false ]
-# Specifies headers to send to proxies during CONNECT requests.
-[ proxy_connect_header:
- [ : [, ...] ] ]
-
-# Configure whether HTTP requests follow HTTP 3xx redirects.
-[ follow_redirects: | default = true ]
-
-# Whether to enable HTTP2.
-[ enable_http2: | default: true ]
-
-# TLS configuration.
-tls_config:
- [ ]
+# HTTP client settings, including authentication methods (such as basic auth and
+# authorization), proxy configurations, TLS options, custom HTTP headers, etc.
+[ ]
```
### ``
@@ -693,7 +808,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_consul_address`: the address of the target
* `__meta_consul_dc`: the datacenter name for the target
* `__meta_consul_health`: the health status of the service
-* `__meta_consul_partition`: the admin partition name where the service is registered
+* `__meta_consul_partition`: the admin partition name where the service is registered
* `__meta_consul_metadata_`: each node metadata key value of the target
* `__meta_consul_node`: the node name defined for the target
* `__meta_consul_service_address`: the service address of the target
@@ -726,14 +841,17 @@ The following meta labels are available on targets during [relabeling](#relabel_
services:
[ - ]
-# See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more
-# about the possible filters that can be used.
+# A Consul Filter expression used to filter the catalog results
+# See https://www.consul.io/api-docs/catalog#list-services to know more
+# about the filter expressions that can be used.
+[ filter: ]
+# The `tags` and `node_meta` fields are deprecated in Consul in favor of `filter`.
# An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list.
tags:
[ - ]
-# Node metadata key/value pairs to filter nodes for a given service.
+# Node metadata key/value pairs to filter nodes for a given service. As of Consul 1.14, consider `filter` instead.
[ node_meta:
[ : ... ] ]
@@ -747,53 +865,9 @@ tags:
# On large setup it might be a good idea to increase this value because the catalog will change all the time.
[ refresh_interval: | default = 30s ]
-# Authentication information used to authenticate to the consul server.
-# Note that `basic_auth`, `authorization` and `oauth2` options are
-# mutually exclusive.
-# `password` and `password_file` are mutually exclusive.
-
-# Optional HTTP basic authentication information.
-basic_auth:
- [ username: ]
- [ password: ]
- [ password_file: ]
-
-# Optional `Authorization` header configuration.
-authorization:
- # Sets the authentication type.
- [ type: | default: Bearer ]
- # Sets the credentials. It is mutually exclusive with
- # `credentials_file`.
- [ credentials: ]
- # Sets the credentials to the credentials read from the configured file.
- # It is mutually exclusive with `credentials`.
- [ credentials_file: ]
-
-# Optional OAuth 2.0 configuration.
-oauth2:
- [ ]
-
-# Optional proxy URL.
-[ proxy_url: ]
-# Comma-separated string that can contain IPs, CIDR notation, domain names
-# that should be excluded from proxying. IP and domain names can
-# contain port numbers.
-[ no_proxy: ]
-# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
-[ proxy_from_environment: | default: false ]
-# Specifies headers to send to proxies during CONNECT requests.
-[ proxy_connect_header:
- [ : [, ...] ] ]
-
-# Configure whether HTTP requests follow HTTP 3xx redirects.
-[ follow_redirects: | default = true ]
-
-# Whether to enable HTTP2.
-[ enable_http2: | default: true ]
-
-# TLS configuration.
-tls_config:
- [ ]
+# HTTP client settings, including authentication methods (such as basic auth and
+# authorization), proxy configurations, TLS options, custom HTTP headers, etc.
+[ ]
```
Note that the IP number and port used to scrape the targets is assembled as
@@ -833,60 +907,15 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_digitalocean_vpc`: the id of the droplet's VPC
```yaml
-# Authentication information used to authenticate to the API server.
-# Note that `basic_auth` and `authorization` options are
-# mutually exclusive.
-# password and password_file are mutually exclusive.
-
-# Optional HTTP basic authentication information, not currently supported by DigitalOcean.
-basic_auth:
- [ username: ]
- [ password: ]
- [ password_file: ]
-
-# Optional `Authorization` header configuration.
-authorization:
- # Sets the authentication type.
- [ type: | default: Bearer ]
- # Sets the credentials. It is mutually exclusive with
- # `credentials_file`.
- [ credentials: ]
- # Sets the credentials to the credentials read from the configured file.
- # It is mutually exclusive with `credentials`.
- [ credentials_file: ]
-
-# Optional OAuth 2.0 configuration.
-# Cannot be used at the same time as basic_auth or authorization.
-oauth2:
- [ ]
-
-# Optional proxy URL.
-[ proxy_url: ]
-# Comma-separated string that can contain IPs, CIDR notation, domain names
-# that should be excluded from proxying. IP and domain names can
-# contain port numbers.
-[ no_proxy: ]
-# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
-[ proxy_from_environment: | default: false ]
-# Specifies headers to send to proxies during CONNECT requests.
-[ proxy_connect_header:
- [ : [, ...] ] ]
-
-# Configure whether HTTP requests follow HTTP 3xx redirects.
-[ follow_redirects: | default = true ]
-
-# Whether to enable HTTP2.
-[ enable_http2: | default: true ]
-
-# TLS configuration.
-tls_config:
- [ ]
-
# The port to scrape metrics from.
[ port: | default = 80 ]
# The time after which the droplets are refreshed.
[ refresh_interval: | default = 60s ]
+
+# HTTP client settings, including authentication methods (such as basic auth and
+# authorization), proxy configurations, TLS options, custom HTTP headers, etc.
+[ ]
```
### ``
@@ -918,22 +947,6 @@ See below for the configuration options for Docker discovery:
# Address of the Docker daemon.
host:
-# Optional proxy URL.
-[ proxy_url: ]
-# Comma-separated string that can contain IPs, CIDR notation, domain names
-# that should be excluded from proxying. IP and domain names can
-# contain port numbers.
-[ no_proxy: ]
-# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
-[ proxy_from_environment: | default: false ]
-# Specifies headers to send to proxies during CONNECT requests.
-[ proxy_connect_header:
- [ : [, ...] ] ]
-
-# TLS configuration.
-tls_config:
- [ ]
-
# The port to scrape metrics from, when `role` is nodes, and for discovered
# tasks and services that don't have published ports.
[ port: | default = 80 ]
@@ -941,7 +954,9 @@ tls_config:
# The host to use if the container is in host networking mode.
[ host_networking_host: | default = "localhost" ]
-# Match the first network if the container has multiple networks defined, thus avoiding collecting duplicate targets.
+# Sort all non-nil networks in ascending order based on network name and
+# get the first network if the container has multiple networks defined,
+# thus avoiding collecting duplicate targets.
[ match_first_network: | default = true ]
# Optional filters to limit the discovery process to a subset of available
@@ -955,39 +970,9 @@ tls_config:
# The time after which the containers are refreshed.
[ refresh_interval: | default = 60s ]
-# Authentication information used to authenticate to the Docker daemon.
-# Note that `basic_auth` and `authorization` options are
-# mutually exclusive.
-# password and password_file are mutually exclusive.
-
-# Optional HTTP basic authentication information.
-basic_auth:
- [ username: ]
- [ password: ]
- [ password_file: ]
-
-# Optional `Authorization` header configuration.
-authorization:
- # Sets the authentication type.
- [ type: | default: Bearer ]
- # Sets the credentials. It is mutually exclusive with
- # `credentials_file`.
- [ credentials: ]
- # Sets the credentials to the credentials read from the configured file.
- # It is mutually exclusive with `credentials`.
- [ credentials_file: ]
-
-# Optional OAuth 2.0 configuration.
-# Cannot be used at the same time as basic_auth or authorization.
-oauth2:
- [ ]
-
-# Configure whether HTTP requests follow HTTP 3xx redirects.
-[ follow_redirects: | default = true ]
-
-# Whether to enable HTTP2.
-[ enable_http2: | default: true ]
-
+# HTTP client settings, including authentication methods (such as basic auth and
+# authorization), proxy configurations, TLS options, custom HTTP headers, etc.
+[ ]
```
The [relabeling phase](#relabel_config) is the preferred and more powerful
@@ -1096,22 +1081,6 @@ See below for the configuration options for Docker Swarm discovery:
# Address of the Docker daemon.
host:
-# Optional proxy URL.
-[ proxy_url: ]
-# Comma-separated string that can contain IPs, CIDR notation, domain names
-# that should be excluded from proxying. IP and domain names can
-# contain port numbers.
-[ no_proxy: ]
-# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
-[ proxy_from_environment: | default: false ]
-# Specifies headers to send to proxies during CONNECT requests.
-[ proxy_connect_header:
- [ : [, ...] ] ]
-
-# TLS configuration.
-tls_config:
- [ ]
-
# Role of the targets to retrieve. Must be `services`, `tasks`, or `nodes`.
role:
@@ -1132,39 +1101,9 @@ role:
# The time after which the service discovery data is refreshed.
[ refresh_interval: | default = 60s ]
-# Authentication information used to authenticate to the Docker daemon.
-# Note that `basic_auth` and `authorization` options are
-# mutually exclusive.
-# password and password_file are mutually exclusive.
-
-# Optional HTTP basic authentication information.
-basic_auth:
- [ username: ]
- [ password: ]
- [ password_file: ]
-
-# Optional `Authorization` header configuration.
-authorization:
- # Sets the authentication type.
- [ type: | default: Bearer ]
- # Sets the credentials. It is mutually exclusive with
- # `credentials_file`.
- [ credentials: ]
- # Sets the credentials to the credentials read from the configured file.
- # It is mutually exclusive with `credentials`.
- [ credentials_file: ]
-
-# Optional OAuth 2.0 configuration.
-# Cannot be used at the same time as basic_auth or authorization.
-oauth2:
- [ ]
-
-# Configure whether HTTP requests follow HTTP 3xx redirects.
-[ follow_redirects: | default = true ]
-
-# Whether to enable HTTP2.
-[ enable_http2: | default: true ]
-
+# HTTP client settings, including authentication methods (such as basic auth and
+# authorization), proxy configurations, TLS options, custom HTTP headers, etc.
+[ ]
```
The [relabeling phase](#relabel_config) is the preferred and more powerful
@@ -1279,53 +1218,9 @@ filters:
[ - name:
values: , [...] ]
-# Authentication information used to authenticate to the EC2 API.
-# Note that `basic_auth`, `authorization` and `oauth2` options are
-# mutually exclusive.
-# `password` and `password_file` are mutually exclusive.
-
-# Optional HTTP basic authentication information, currently not supported by AWS.
-basic_auth:
- [ username: ]
- [ password: ]
- [ password_file: ]
-
-# Optional `Authorization` header configuration, currently not supported by AWS.
-authorization:
- # Sets the authentication type.
- [ type: | default: Bearer ]
- # Sets the credentials. It is mutually exclusive with
- # `credentials_file`.
- [ credentials: ]
- # Sets the credentials to the credentials read from the configured file.
- # It is mutuall exclusive with `credentials`.
- [ credentials_file: ]
-
-# Optional OAuth 2.0 configuration, currently not supported by AWS.
-oauth2:
- [ ]
-
-# Optional proxy URL.
-[ proxy_url: ]
-# Comma-separated string that can contain IPs, CIDR notation, domain names
-# that should be excluded from proxying. IP and domain names can
-# contain port numbers.
-[ no_proxy: ]
-# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy)
-[ proxy_from_environment: | default: false ]
-# Specifies headers to send to proxies during CONNECT requests.
-[ proxy_connect_header:
- [ : [, ...] ] ]
-
-# Configure whether HTTP requests follow HTTP 3xx redirects.
-[ follow_redirects: | default = true ]
-
-# Whether to enable HTTP2.
-[ enable_http2: | default: true ]
-
-# TLS configuration.
-tls_config:
- [ ]
+# HTTP client settings, including authentication methods (such as basic auth and
+# authorization), proxy configurations, TLS options, custom HTTP headers, etc.
+[ ]
```
The [relabeling phase](#relabel_config) is the preferred and more powerful
@@ -1374,6 +1269,25 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_openstack_tag_`: each metadata item of the instance, with any unsupported characters converted to an underscore.
* `__meta_openstack_user_id`: the user account owning the tenant.
+#### `loadbalancer`
+
+The `loadbalancer` role discovers one target per Octavia loadbalancer with a
+`PROMETHEUS` listener. The target address defaults to the VIP address
+of the load balancer.
+
+The following meta labels are available on targets during [relabeling](#relabel_config):
+
+* `__meta_openstack_loadbalancer_availability_zone`: the availability zone of the OpenStack load balancer.
+* `__meta_openstack_loadbalancer_floating_ip`: the floating IP of the OpenStack load balancer.
+* `__meta_openstack_loadbalancer_id`: the OpenStack load balancer ID.
+* `__meta_openstack_loadbalancer_name`: the OpenStack load balancer name.
+* `__meta_openstack_loadbalancer_provider`: the Octavia provider of the OpenStack load balancer.
+* `__meta_openstack_loadbalancer_operating_status`: the operating status of the OpenStack load balancer.
+* `__meta_openstack_loadbalancer_provisioning_status`: the provisioning status of the OpenStack load balancer.
+* `__meta_openstack_loadbalancer_tags`: comma separated list of the OpenStack load balancer.
+* `__meta_openstack_loadbalancer_vip`: the VIP of the OpenStack load balancer.
+* `__meta_openstack_project_id`: the project (tenant) owning this load balancer.
+
See below for the configuration options for OpenStack discovery:
```yaml
@@ -1554,51 +1468,9 @@ query:
# The port to scrape metrics from.
[ port: | default = 80 ]
-# TLS configuration to connect to the PuppetDB.
-tls_config:
- [