diff --git a/.circleci/config.yml b/.circleci/config.yml index 6ad8cb8427..6471f2a3c8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ executors: # should also be updated. golang: docker: - - image: circleci/golang:1.14-node + - image: circleci/golang:1.15-node fuzzit: docker: @@ -27,8 +27,8 @@ jobs: key: v1 - restore_cache: keys: - - v1-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} - - v1-npm-deps- + - v3-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} + - v3-npm-deps- - run: command: make environment: @@ -41,7 +41,7 @@ jobs: GOOPTS: "-p 2" GOMAXPROCS: "2" - prometheus/check_proto: - version: "3.11.4" + version: "3.12.3" - prometheus/store_artifact: file: prometheus - prometheus/store_artifact: @@ -49,11 +49,12 @@ jobs: - go/save-cache: key: v1 - save_cache: - key: v1-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} + key: v3-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} paths: - - web/ui/react-app/node_modules + - /home/circleci/.cache/yarn - store_test_results: path: test-results + test_windows: executor: win/default working_directory: /go/src/github.com/prometheus/prometheus @@ -72,6 +73,28 @@ jobs: environment: GOGC: "20" GOOPTS: "-p 2 -mod=vendor" + + test_mixins: + executor: golang + steps: + - checkout + - run: go install -mod=vendor ./cmd/promtool/. + - run: + command: go install -mod=readonly github.com/google/go-jsonnet/cmd/jsonnet github.com/google/go-jsonnet/cmd/jsonnetfmt github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb + working_directory: ~/project/documentation/prometheus-mixin + - run: + command: make clean + working_directory: ~/project/documentation/prometheus-mixin + - run: + command: jb install + working_directory: ~/project/documentation/prometheus-mixin + - run: + command: make + working_directory: ~/project/documentation/prometheus-mixin + - run: + command: git diff --exit-code + working_directory: ~/project/documentation/prometheus-mixin + fuzzit_regression: executor: fuzzit working_directory: /go/src/github.com/prometheus/prometheus @@ -79,6 +102,7 @@ jobs: - checkout - setup_remote_docker - run: ./fuzzit.sh local-regression + fuzzit_fuzzing: executor: fuzzit working_directory: /go/src/github.com/prometheus/prometheus @@ -101,6 +125,10 @@ workflows: filters: tags: only: /.*/ + - test_mixins: + filters: + tags: + only: /.*/ - test_windows: filters: tags: diff --git a/.dockerignore b/.dockerignore index 486d44ad9e..5eca8e1b80 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,4 +5,5 @@ data/ !.build/linux-amd64/ !.build/linux-armv7/ !.build/linux-arm64/ +!.build/linux-ppc64le/ !.build/linux-s390x/ diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index d4d404b569..a9929e392a 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -2,7 +2,6 @@ name: Bug report about: Create a report to help us improve. title: '' -labels: kind/bug assignees: '' --- diff --git a/.github/workflows/funcbench.yml b/.github/workflows/funcbench.yml index 3cb719cb22..811b3bf2a8 100644 --- a/.github/workflows/funcbench.yml +++ b/.github/workflows/funcbench.yml @@ -1,4 +1,6 @@ -on: repository_dispatch +on: + repository_dispatch: + types: [funcbench_start] name: Funcbench Workflow jobs: run_funcbench: @@ -6,16 +8,18 @@ jobs: if: github.event.action == 'funcbench_start' runs-on: ubuntu-latest env: - AUTH_FILE: ${{ secrets.TEST_INFRA_GKE_AUTH }} + AUTH_FILE: ${{ secrets.TEST_INFRA_PROVIDER_AUTH }} BRANCH: ${{ github.event.client_payload.BRANCH }} BENCH_FUNC_REGEX: ${{ github.event.client_payload.BENCH_FUNC_REGEX }} + PACKAGE_PATH: ${{ github.event.client_payload.PACKAGE_PATH }} GITHUB_TOKEN: ${{ secrets.PROMBOT_TOKEN }} GITHUB_ORG: prometheus GITHUB_REPO: prometheus GITHUB_STATUS_TARGET_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} LAST_COMMIT_SHA: ${{ github.event.client_payload.LAST_COMMIT_SHA }} - PROJECT_ID: macro-mile-203600 + GKE_PROJECT_ID: macro-mile-203600 PR_NUMBER: ${{ github.event.client_payload.PR_NUMBER }} + PROVIDER: gke ZONE: europe-west3-a steps: - name: Update status to pending diff --git a/.github/workflows/prombench.yml b/.github/workflows/prombench.yml index e7ae131472..d7c62bd31c 100644 --- a/.github/workflows/prombench.yml +++ b/.github/workflows/prombench.yml @@ -1,7 +1,9 @@ -on: repository_dispatch +on: + repository_dispatch: + types: [prombench_start,prombench_restart,prombench_stop] name: Prombench Workflow env: - AUTH_FILE: ${{ secrets.TEST_INFRA_GKE_AUTH }} + AUTH_FILE: ${{ secrets.TEST_INFRA_PROVIDER_AUTH }} CLUSTER_NAME: test-infra DOMAIN_NAME: prombench.prometheus.io GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -9,8 +11,9 @@ env: GITHUB_REPO: prometheus GITHUB_STATUS_TARGET_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} LAST_COMMIT_SHA: ${{ github.event.client_payload.LAST_COMMIT_SHA }} - PROJECT_ID: macro-mile-203600 + GKE_PROJECT_ID: macro-mile-203600 PR_NUMBER: ${{ github.event.client_payload.PR_NUMBER }} + PROVIDER: gke RELEASE: ${{ github.event.client_payload.RELEASE }} ZONE: europe-west3-a jobs: @@ -31,7 +34,7 @@ jobs: uses: docker://prominfra/prombench:master with: args: >- - until make all_nodepools_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done; + until make all_nodes_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done; make deploy; - name: Update status to failure if: failure() @@ -66,7 +69,7 @@ jobs: uses: docker://prominfra/prombench:master with: args: >- - until make all_nodepools_running; do echo "waiting for nodepools to be created"; sleep 10; done; + until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done; make clean; - name: Update status to failure if: failure() @@ -101,9 +104,9 @@ jobs: uses: docker://prominfra/prombench:master with: args: >- - until make all_nodepools_running; do echo "waiting for nodepools to be created"; sleep 10; done; + until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done; make clean; - until make all_nodepools_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done; + until make all_nodes_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done; make deploy; - name: Update status to failure if: failure() diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 0000000000..8585b4a255 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,18 @@ +tasks: + - init: + make build + command: | + gp sync-done build + ./prometheus --config.file=documentation/examples/prometheus.yml + - command: | + cd web/ui/react-app + gp sync-await build + unset BROWSER + export DANGEROUSLY_DISABLE_HOST_CHECK=true + yarn start + openMode: split-right +ports: + - port: 3000 + onOpen: open-preview + - port: 9090 + onOpen: ignore diff --git a/.golangci.yml b/.golangci.yml index 1bed66642c..c19321d677 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -4,6 +4,7 @@ run: linters: enable: + - depguard - golint issues: @@ -13,5 +14,12 @@ issues: - errcheck linters-settings: + depguard: + list-type: blacklist + include-go-root: true + packages: + - sync/atomic + packages-with-error-message: + - sync/atomic: "Use go.uber.org/atomic instead of sync/atomic" errcheck: exclude: scripts/errcheck_excludes.txt diff --git a/.promu.yml b/.promu.yml index cf5acb2499..caaa63a55d 100644 --- a/.promu.yml +++ b/.promu.yml @@ -1,7 +1,7 @@ go: # Whenever the Go version is updated here, # .circle/config.yml should also be updated. - version: 1.14 + version: 1.15 repository: path: github.com/prometheus/prometheus build: @@ -30,7 +30,6 @@ crossbuild: - linux/amd64 - linux/386 - darwin/amd64 - - darwin/386 - windows/amd64 - windows/386 - freebsd/amd64 diff --git a/CHANGELOG.md b/CHANGELOG.md index a2d89a13c2..b85bb12713 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,65 @@ +## 2.22.0-rc.0 / 2020-10-07 + +As announced in the 2.21.0 release notes, the experimental gRPC API v2 has been +removed. + +* [CHANGE] web: Remove APIv2. #7935 +* [ENHANCEMENT] React UI: Implement missing TSDB head stats section. #7876 +* [ENHANCEMENT] UI: Add Collapse all button. #6957 +* [ENHANCEMENT] UI: Clarify alert state toggle via checkbox icon. #7936 +* [ENHANCEMENT] Add `rule_group_last_evaluation_samples` and `prometheus_tsdb_data_replay_duration_seconds` metrics. #7737 #7977 +* [ENHANCEMENT] Gracefully handle unknown WAL record types. #8004 +* [ENHANCEMENT] Issue a warning for 64 bit systems running 32 bit binaries. #8012 +* [BUGFIX] Adjust scrape timestamps to align them to the intended schedule, effectively reducing block size. Workaround for a regression in go1.14+. #7976 +* [BUGFIX] promtool: Ensure alert rules are marked as restored in unit tests. #7661 +* [BUGFIX] Eureka: Fix service discovery when compiled in 32-bit. #7964 +* [BUGFIX] Don't do literal regex matching optimisation when case insensitive. #8013 +* [BUGFIX] Fix classic UI sometimes running queries for instant query when in range query mode. #7984 + +## 2.21.0 / 2020-09-11 + +This release is built with Go 1.15, which deprecates [X.509 CommonName](https://golang.org/doc/go1.15#commonname) +in TLS certificates validation. + +In the unlikely case that you use the gRPC API v2 (which is limited to TSDB +admin commands), please note that we will remove this experimental API in the +next minor release 2.22. + +* [CHANGE] Disable HTTP/2 because of concerns with the Go HTTP/2 client. #7588 #7701 +* [CHANGE] PromQL: `query_log_file` path is now relative to the config file. #7701 +* [CHANGE] Promtool: Replace the tsdb command line tool by a promtool tsdb subcommand. #6088 +* [CHANGE] Rules: Label `rule_group_iterations` metric with group name. #7823 +* [FEATURE] Eureka SD: New service discovery. #3369 +* [FEATURE] Hetzner SD: New service discovery. #7822 +* [FEATURE] Kubernetes SD: Support Kubernetes EndpointSlices. #6838 +* [FEATURE] Scrape: Add per scrape-config targets limit. #7554 +* [ENHANCEMENT] Support composite durations in PromQL, config and UI, e.g. 1h30m. #7713 #7833 +* [ENHANCEMENT] DNS SD: Add SRV record target and port meta labels. #7678 +* [ENHANCEMENT] Docker Swarm SD: Support tasks and service without published ports. #7686 +* [ENHANCEMENT] PromQL: Reduce the amount of data queried by remote read when a subquery has an offset. #7667 +* [ENHANCEMENT] Promtool: Add `--time` option to query instant command. #7829 +* [ENHANCEMENT] UI: Respect the `--web.page-title` parameter in the React UI. #7607 +* [ENHANCEMENT] UI: Add duration, labels, annotations to alerts page in the React UI. #7605 +* [ENHANCEMENT] UI: Add duration on the React UI rules page, hide annotation and labels if empty. #7606 +* [BUGFIX] API: Deduplicate series in /api/v1/series. #7862 +* [BUGFIX] PromQL: Drop metric name in bool comparison between two instant vectors. #7819 +* [BUGFIX] PromQL: Exit with an error when time parameters can't be parsed. #7505 +* [BUGFIX] Remote read: Re-add accidentally removed tracing for remote-read requests. #7916 +* [BUGFIX] Rules: Detect extra fields in rule files. #7767 +* [BUGFIX] Rules: Disallow overwriting the metric name in the `labels` section of recording rules. #7787 +* [BUGFIX] Rules: Keep evaluation timestamp across reloads. #7775 +* [BUGFIX] Scrape: Do not stop scrapes in progress during reload. #7752 +* [BUGFIX] TSDB: Fix `chunks.HeadReadWriter: maxt of the files are not set` error. #7856 +* [BUGFIX] TSDB: Delete blocks atomically to prevent corruption when there is a panic/crash during deletion. #7772 +* [BUGFIX] Triton SD: Fix a panic when triton_sd_config is nil. #7671 +* [BUGFIX] UI: Fix react UI bug with series going on and off. #7804 +* [BUGFIX] UI: Fix styling bug for target labels with special names in React UI. #7902 +* [BUGFIX] Web: Stop CMUX and GRPC servers even with stale connections, preventing the server to stop on SIGTERM. #7810 + +## 2.20.1 / 2020-08-05 + +* [BUGFIX] SD: Reduce the Consul watch timeout to 2m and adjust the request timeout accordingly. #7724 + ## 2.20.0 / 2020-07-22 This release changes WAL compression from opt-in to default. WAL compression will prevent a downgrade to v2.10 or earlier without deleting the WAL. Disable WAL compression explicitly by setting the command line flag `--no-storage.tsdb.wal-compression` if you require downgrading to v2.10 or earlier. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7bc20eccb1..2691af055f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,6 +28,8 @@ Should you wish to work on an issue, please claim it first by commenting on the Please check the [`low-hanging-fruit`](https://github.com/prometheus/prometheus/issues?q=is%3Aissue+is%3Aopen+label%3A%22low+hanging+fruit%22) label to find issues that are good for getting started. If you have questions about one of the issues, with or without the tag, please comment on them and one of the maintainers will clarify it. For a quicker response, contact us over [IRC](https://prometheus.io/community). +You can [spin up a prebuilt dev environment](https://gitpod.io/#https://github.com/prometheus/prometheus) using Gitpod.io. + For complete instructions on how to compile see: [Building From Source](https://github.com/prometheus/prometheus#building-from-source) For quickly compiling and testing your changes do: @@ -56,7 +58,7 @@ All our issues are regularly tagged so that you can also filter down the issues ## Dependency management -The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.13 or greater installed. +The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. All dependencies are vendored in the `vendor/` directory. diff --git a/MAINTAINERS.md b/MAINTAINERS.md index c9178d1f9b..da3391fc8b 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -8,7 +8,7 @@ * `prometheus-mixin`: @beorn7 * `storage` * `remote`: @csmarchbanks, @cstyan, @bwplotka -* `tsdb`: @codesome, @krasi-georgiev +* `tsdb`: @codesome, @krasi-georgiev, @bwplotka * `web` * `ui`: @juliusv * `Makefile` and related build configuration: @simonpasquier, @SuperQ diff --git a/Makefile b/Makefile index 4e6ec6657f..810f7e4605 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ # limitations under the License. # Needs to be defined before including Makefile.common to auto-generate targets -DOCKER_ARCHS ?= amd64 armv7 arm64 s390x +DOCKER_ARCHS ?= amd64 armv7 arm64 ppc64le s390x REACT_APP_PATH = web/ui/react-app REACT_APP_SOURCE_FILES = $(wildcard $(REACT_APP_PATH)/public/* $(REACT_APP_PATH)/src/* $(REACT_APP_PATH)/tsconfig.json) diff --git a/Makefile.common b/Makefile.common index 9320176ca2..3f3d02cba7 100644 --- a/Makefile.common +++ b/Makefile.common @@ -78,7 +78,7 @@ ifneq ($(shell which gotestsum),) endif endif -PROMU_VERSION ?= 0.5.0 +PROMU_VERSION ?= 0.6.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz GOLANGCI_LINT := diff --git a/README.md b/README.md index 4c2fd6f726..f529864107 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![Go Report Card](https://goreportcard.com/badge/github.com/prometheus/prometheus)](https://goreportcard.com/report/github.com/prometheus/prometheus) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/486/badge)](https://bestpractices.coreinfrastructure.org/projects/486) [![fuzzit](https://app.fuzzit.dev/badge?org_id=prometheus&branch=master)](https://fuzzit.dev) +[![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/prometheus/prometheus) Visit [prometheus.io](https://prometheus.io) for the full documentation, examples and guides. @@ -18,7 +19,7 @@ to be true. Prometheus's main distinguishing features as compared to other monitoring systems are: - a **multi-dimensional** data model (timeseries defined by metric name and set of key/value dimensions) -- a **flexible query language** to leverage this dimensionality +- PromQL, a **powerful and flexible query language** to leverage this dimensionality - no dependency on distributed storage; **single server nodes are autonomous** - timeseries collection happens via a **pull model** over HTTP - **pushing timeseries** is supported via an intermediary gateway @@ -43,8 +44,6 @@ is the recommended way of installing Prometheus. See the [Installing](https://prometheus.io/docs/introduction/install/) chapter in the documentation for all the details. -Debian packages [are available](https://packages.debian.org/sid/net/prometheus). - ### Docker images Docker images are available on [Quay.io](https://quay.io/repository/prometheus/prometheus) or [Docker Hub](https://hub.docker.com/r/prom/prometheus/). @@ -94,6 +93,7 @@ The Makefile provides several targets: * *format*: format the source code * *vet*: check the source code for common errors * *docker*: build a docker container for the current `HEAD` + * *assets*: build the new experimental React UI ## React UI Development diff --git a/RELEASE.md b/RELEASE.md index bfa798f10d..daba3ac65b 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -38,7 +38,7 @@ The release shepherd is responsible for the entire release series of a minor rel * We aim to keep the master branch in a working state at all times. In principle, it should be possible to cut a release from master at any time. In practice, things might not work out as nicely. A few days before the pre-release is scheduled, the shepherd should check the state of master. Following their best judgement, the shepherd should try to expedite bug fixes that are still in progress but should make it into the release. On the other hand, the shepherd may hold back merging last-minute invasive and risky changes that are better suited for the next minor release. * On the date listed in the table above, the release shepherd cuts the first pre-release (using the suffix `-rc.0`) and creates a new branch called `release-.` starting at the commit tagged for the pre-release. In general, a pre-release is considered a release candidate (that's what `rc` stands for) and should therefore not contain any known bugs that are planned to be fixed in the final release. * With the pre-release, the release shepherd is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release. -* If regressions or critical bugs are detected, they need to get fixed before cutting a new pre-release (called `-rc.1`, `-rc.2`, etc.). +* If regressions or critical bugs are detected, they need to get fixed before cutting a new pre-release (called `-rc.1`, `-rc.2`, etc.). See the next section for details on cutting an individual release. @@ -62,13 +62,7 @@ Maintaining the release branches for older minor releases happens on a best effo ### 0. Updating dependencies -A few days before a major or minor release, consider updating the dependencies: - -``` -make update-go-deps -git add go.mod go.sum vendor -git commit -m "Update dependencies" -``` +A few days before a major or minor release, consider updating the dependencies. Then create a pull request against the master branch. @@ -81,6 +75,32 @@ you can skip the dependency update or only update select dependencies. In such a case, you have to create an issue or pull request in the GitHub project for later follow-up. +#### Updating Go dependencies + +``` +make update-go-deps +git add go.mod go.sum vendor +git commit -m "Update dependencies" +``` + +#### Updating React dependencies + +Either upgrade the dependencies within their existing version constraints as specified in the `package.json` file (see https://docs.npmjs.com/files/package.json#dependencies): + +``` +cd web/ui/react-app +yarn upgrade +git add yarn.lock +``` + +Or alternatively, update all dependencies to their latest major versions. This is potentially more disruptive and will require more follow-up fixes, but should be done from time to time (use your best judgement): + +``` +cd web/ui/react-app +yarn upgrade --latest +git add package.json yarn.lock +``` + ### 1. Prepare your release At the start of a new major or minor release cycle create the corresponding release branch based on the master branch. For example if we're releasing `2.17.0` and the previous stable release is `2.16.0` we need to create a `release-2.17` branch. Note that all releases are handled in protected release branches, see the above `Branch management and versioning` section. Release candidates and patch releases for any given major or minor release happen in the same `release-.` branch. Do not create `release-` for patch or release candidate releases. @@ -132,6 +152,4 @@ For release candidate versions (`v2.16.0-rc.0`), run the benchmark for 3 days us If the release has happened in the latest release branch, merge the changes into master. -To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration (note: only actually merge this for final releases, not for pre-releases like a release candidate). - Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration. diff --git a/VERSION b/VERSION index 7329e21c3b..1ed0304cd5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.20.0 +2.22.0-rc.0 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index e2fb6b26f8..52b646d181 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -19,6 +19,7 @@ import ( "fmt" "io" "math" + "math/bits" "net" "net/http" _ "net/http/pprof" // Comment this line to disable pprof endpoint. @@ -30,7 +31,6 @@ import ( "runtime" "strings" "sync" - "sync/atomic" "syscall" "time" @@ -47,13 +47,14 @@ import ( "github.com/prometheus/common/version" jcfg "github.com/uber/jaeger-client-go/config" jprom "github.com/uber/jaeger-lib/metrics/prometheus" + "go.uber.org/atomic" kingpin "gopkg.in/alecthomas/kingpin.v2" - "k8s.io/klog" + klog "k8s.io/klog" + klogv2 "k8s.io/klog/v2" promlogflag "github.com/prometheus/common/promlog/flag" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" - sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/logging" @@ -67,6 +68,8 @@ import ( "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/web" + + _ "github.com/prometheus/prometheus/discovery/install" // Register service discovery implementations. ) var ( @@ -238,6 +241,9 @@ func main() { a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). Default("1m").SetValue(&cfg.resendDelay) + a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to 2ms to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release."). + Hidden().Default("true").BoolVar(&scrape.AlignScrapeTimestamps) + a.Flag("alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications."). Default("10000").IntVar(&cfg.notifier.QueueCapacity) @@ -284,6 +290,14 @@ func main() { level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "err", err) os.Exit(2) } + // Now that the validity of the config is established, set the config + // success metrics accordingly, although the config isn't really loaded + // yet. This will happen later (including setting these metrics again), + // but if we don't do it now, the metrics will stay at zero until the + // startup procedure is complete, which might take long enough to + // trigger alerts about an invalid config. + configSuccess.Set(1) + configSuccessTime.SetToCurrentTime() cfg.web.ReadTimeout = time.Duration(cfg.webTimeout) // Default -web.route-prefix to path of -web.external-url. @@ -341,8 +355,14 @@ func main() { // Above level 6, the k8s client would log bearer tokens in clear-text. klog.ClampLevel(6) klog.SetLogger(log.With(logger, "component", "k8s_client_runtime")) + klogv2.ClampLevel(6) + klogv2.SetLogger(log.With(logger, "component", "k8s_client_runtime")) level.Info(logger).Log("msg", "Starting Prometheus", "version", version.Info()) + if bits.UintSize < 64 { + level.Warn(logger).Log("msg", "This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH) + } + level.Info(logger).Log("build_context", version.BuildContext()) level.Info(logger).Log("host_details", prom_runtime.Uname()) level.Info(logger).Log("fd_limits", prom_runtime.FdLimits()) @@ -436,56 +456,73 @@ func main() { conntrack.DialWithTracing(), ) - reloaders := []func(cfg *config.Config) error{ - remoteStorage.ApplyConfig, - webHandler.ApplyConfig, - func(cfg *config.Config) error { - if cfg.GlobalConfig.QueryLogFile == "" { - queryEngine.SetQueryLogger(nil) - return nil - } - - l, err := logging.NewJSONFileLogger(cfg.GlobalConfig.QueryLogFile) - if err != nil { - return err - } - queryEngine.SetQueryLogger(l) - return nil - }, - // The Scrape and notifier managers need to reload before the Discovery manager as - // they need to read the most updated config when receiving the new targets list. - scrapeManager.ApplyConfig, - func(cfg *config.Config) error { - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig - } - return discoveryManagerScrape.ApplyConfig(c) - }, - notifierManager.ApplyConfig, - func(cfg *config.Config) error { - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for k, v := range cfg.AlertingConfig.AlertmanagerConfigs.ToMap() { - c[k] = v.ServiceDiscoveryConfig - } - return discoveryManagerNotify.ApplyConfig(c) - }, - func(cfg *config.Config) error { - // Get all rule files matching the configuration paths. - var files []string - for _, pat := range cfg.RuleFiles { - fs, err := filepath.Glob(pat) - if err != nil { - // The only error can be a bad pattern. - return errors.Wrapf(err, "error retrieving rule files for %s", pat) + reloaders := []reloader{ + { + name: "remote_storage", + reloader: remoteStorage.ApplyConfig, + }, { + name: "web_handler", + reloader: webHandler.ApplyConfig, + }, { + name: "query_engine", + reloader: func(cfg *config.Config) error { + if cfg.GlobalConfig.QueryLogFile == "" { + queryEngine.SetQueryLogger(nil) + return nil } - files = append(files, fs...) - } - return ruleManager.Update( - time.Duration(cfg.GlobalConfig.EvaluationInterval), - files, - cfg.GlobalConfig.ExternalLabels, - ) + + l, err := logging.NewJSONFileLogger(cfg.GlobalConfig.QueryLogFile) + if err != nil { + return err + } + queryEngine.SetQueryLogger(l) + return nil + }, + }, { + // The Scrape and notifier managers need to reload before the Discovery manager as + // they need to read the most updated config when receiving the new targets list. + name: "scrape", + reloader: scrapeManager.ApplyConfig, + }, { + name: "scrape_sd", + reloader: func(cfg *config.Config) error { + c := make(map[string]discovery.Configs) + for _, v := range cfg.ScrapeConfigs { + c[v.JobName] = v.ServiceDiscoveryConfigs + } + return discoveryManagerScrape.ApplyConfig(c) + }, + }, { + name: "notify", + reloader: notifierManager.ApplyConfig, + }, { + name: "notify_sd", + reloader: func(cfg *config.Config) error { + c := make(map[string]discovery.Configs) + for k, v := range cfg.AlertingConfig.AlertmanagerConfigs.ToMap() { + c[k] = v.ServiceDiscoveryConfigs + } + return discoveryManagerNotify.ApplyConfig(c) + }, + }, { + name: "rules", + reloader: func(cfg *config.Config) error { + // Get all rule files matching the configuration paths. + var files []string + for _, pat := range cfg.RuleFiles { + fs, err := filepath.Glob(pat) + if err != nil { + // The only error can be a bad pattern. + return errors.Wrapf(err, "error retrieving rule files for %s", pat) + } + files = append(files, fs...) + } + return ruleManager.Update( + time.Duration(cfg.GlobalConfig.EvaluationInterval), + files, + cfg.GlobalConfig.ExternalLabels, + ) + }, }, } @@ -696,7 +733,13 @@ func main() { return errors.Wrapf(err, "opening storage failed") } - level.Info(logger).Log("fs_type", prom_runtime.Statfs(cfg.localStoragePath)) + switch fsType := prom_runtime.Statfs(cfg.localStoragePath); fsType { + case "NFS_SUPER_MAGIC": + level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + default: + level.Info(logger).Log("fs_type", fsType) + } + level.Info(logger).Log("msg", "TSDB started") level.Debug(logger).Log("msg", "TSDB options", "MinBlockDuration", cfg.tsdb.MinBlockDuration, @@ -801,21 +844,28 @@ func openDBWithMetrics(dir string, logger log.Logger, reg prometheus.Registerer, } type safePromQLNoStepSubqueryInterval struct { - value int64 + value atomic.Int64 } func durationToInt64Millis(d time.Duration) int64 { return int64(d / time.Millisecond) } func (i *safePromQLNoStepSubqueryInterval) Set(ev model.Duration) { - atomic.StoreInt64(&i.value, durationToInt64Millis(time.Duration(ev))) + i.value.Store(durationToInt64Millis(time.Duration(ev))) } func (i *safePromQLNoStepSubqueryInterval) Get(int64) int64 { - return atomic.LoadInt64(&i.value) + return i.value.Load() } -func reloadConfig(filename string, logger log.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, rls ...func(*config.Config) error) (err error) { +type reloader struct { + name string + reloader func(*config.Config) error +} + +func reloadConfig(filename string, logger log.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, rls ...reloader) (err error) { + start := time.Now() + timings := []interface{}{} level.Info(logger).Log("msg", "Loading configuration file", "filename", filename) defer func() { @@ -834,17 +884,20 @@ func reloadConfig(filename string, logger log.Logger, noStepSuqueryInterval *saf failed := false for _, rl := range rls { - if err := rl(conf); err != nil { + rstart := time.Now() + if err := rl.reloader(conf); err != nil { level.Error(logger).Log("msg", "Failed to apply configuration", "err", err) failed = true } + timings = append(timings, rl.name, time.Since(rstart)) } if failed { return errors.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) } noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval) - level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename) + l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)} + level.Info(logger).Log(append(l, timings...)...) return nil } @@ -984,9 +1037,9 @@ func (s *readyStorage) ChunkQuerier(ctx context.Context, mint, maxt int64) (stor } // Appender implements the Storage interface. -func (s *readyStorage) Appender() storage.Appender { +func (s *readyStorage) Appender(ctx context.Context) storage.Appender { if x := s.get(); x != nil { - return x.Appender() + return x.Appender(ctx) } return notReadyAppender{} } diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index f556437b26..a8c907f03f 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -263,7 +263,7 @@ func TestTimeMetrics(t *testing.T) { "prometheus_tsdb_head_max_time_seconds", )) - app := db.Appender() + app := db.Appender(context.Background()) _, err = app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 1000, 1) testutil.Ok(t, err) _, err = app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 2000, 1) diff --git a/cmd/promtool/archive.go b/cmd/promtool/archive.go index 783d8294f4..520c26b63f 100644 --- a/cmd/promtool/archive.go +++ b/cmd/promtool/archive.go @@ -21,7 +21,7 @@ import ( "github.com/pkg/errors" ) -const filePerm = 0644 +const filePerm = 0666 type tarGzFileWriter struct { tarWriter *tar.Writer diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 8197047bcf..e389bf75bb 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -41,7 +41,11 @@ import ( "gopkg.in/alecthomas/kingpin.v2" "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery/file" + "github.com/prometheus/prometheus/discovery/kubernetes" "github.com/prometheus/prometheus/pkg/rulefmt" + + _ "github.com/prometheus/prometheus/discovery/install" // Register service discovery implementations. ) func main() { @@ -67,9 +71,11 @@ func main() { queryCmd := app.Command("query", "Run query against a Prometheus server.") queryCmdFmt := queryCmd.Flag("format", "Output format of the query.").Short('o').Default("promql").Enum("promql", "json") + queryInstantCmd := queryCmd.Command("instant", "Run instant query.") - queryServer := queryInstantCmd.Arg("server", "Prometheus server to query.").Required().String() - queryExpr := queryInstantCmd.Arg("expr", "PromQL query expression.").Required().String() + queryInstantServer := queryInstantCmd.Arg("server", "Prometheus server to query.").Required().String() + queryInstantExpr := queryInstantCmd.Arg("expr", "PromQL query expression.").Required().String() + queryInstantTime := queryInstantCmd.Flag("time", "Query evaluation time (RFC3339 or Unix timestamp).").String() queryRangeCmd := queryCmd.Command("range", "Run range query.") queryRangeServer := queryRangeCmd.Arg("server", "Prometheus server to query.").Required().String() @@ -165,7 +171,7 @@ func main() { os.Exit(CheckMetrics()) case queryInstantCmd.FullCommand(): - os.Exit(QueryInstant(*queryServer, *queryExpr, p)) + os.Exit(QueryInstant(*queryInstantServer, *queryInstantExpr, *queryInstantTime, p)) case queryRangeCmd.FullCommand(): os.Exit(QueryRange(*queryRangeServer, *queryRangeHeaders, *queryRangeExpr, *queryRangeBegin, *queryRangeEnd, *queryRangeStep, p)) @@ -282,24 +288,25 @@ func checkConfig(filename string) ([]string, error) { return nil, err } - for _, kd := range scfg.ServiceDiscoveryConfig.KubernetesSDConfigs { - if err := checkTLSConfig(kd.HTTPClientConfig.TLSConfig); err != nil { - return nil, err - } - } - - for _, filesd := range scfg.ServiceDiscoveryConfig.FileSDConfigs { - for _, file := range filesd.Files { - files, err := filepath.Glob(file) - if err != nil { + for _, c := range scfg.ServiceDiscoveryConfigs { + switch c := c.(type) { + case *kubernetes.SDConfig: + if err := checkTLSConfig(c.HTTPClientConfig.TLSConfig); err != nil { return nil, err } - if len(files) != 0 { - // There was at least one match for the glob and we can assume checkFileExists - // for all matches would pass, we can continue the loop. - continue + case *file.SDConfig: + for _, file := range c.Files { + files, err := filepath.Glob(file) + if err != nil { + return nil, err + } + if len(files) != 0 { + // There was at least one match for the glob and we can assume checkFileExists + // for all matches would pass, we can continue the loop. + continue + } + fmt.Printf(" WARNING: file %q for file_sd in scrape job %q does not exist\n", file, scfg.JobName) } - fmt.Printf(" WARNING: file %q for file_sd in scrape job %q does not exist\n", file, scfg.JobName) } } } @@ -441,7 +448,7 @@ func CheckMetrics() int { } // QueryInstant performs an instant query against a Prometheus server. -func QueryInstant(url, query string, p printer) int { +func QueryInstant(url, query, evalTime string, p printer) int { config := api.Config{ Address: url, } @@ -453,11 +460,20 @@ func QueryInstant(url, query string, p printer) int { return 1 } + eTime := time.Now() + if evalTime != "" { + eTime, err = parseTime(evalTime) + if err != nil { + fmt.Fprintln(os.Stderr, "error parsing evaluation time:", err) + return 1 + } + } + // Run query against client. api := v1.NewAPI(c) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, _, err := api.Query(ctx, query, time.Now()) // Ignoring warnings for now. + val, _, err := api.Query(ctx, query, eTime) // Ignoring warnings for now. cancel() if err != nil { fmt.Fprintln(os.Stderr, "query error:", err) diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 1d28cf2927..a40ff0a860 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -19,6 +19,8 @@ import ( "net/http/httptest" "testing" "time" + + "github.com/prometheus/prometheus/util/testutil" ) func TestQueryRange(t *testing.T) { @@ -27,41 +29,31 @@ func TestQueryRange(t *testing.T) { p := &promqlPrinter{} exitCode := QueryRange(s.URL, map[string]string{}, "up", "0", "300", 0, p) - expectedPath := "/api/v1/query_range" - gotPath := getRequest().URL.Path - if gotPath != expectedPath { - t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath) - } + testutil.Equals(t, "/api/v1/query_range", getRequest().URL.Path) form := getRequest().Form - actual := form.Get("query") - if actual != "up" { - t.Errorf("unexpected value %s for query", actual) - } - actual = form.Get("step") - if actual != "1" { - t.Errorf("unexpected value %s for step", actual) - } - if exitCode > 0 { - t.Error() - } + testutil.Equals(t, "up", form.Get("query")) + testutil.Equals(t, "1", form.Get("step")) + testutil.Equals(t, 0, exitCode) exitCode = QueryRange(s.URL, map[string]string{}, "up", "0", "300", 10*time.Millisecond, p) - gotPath = getRequest().URL.Path - if gotPath != expectedPath { - t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath) - } + testutil.Equals(t, "/api/v1/query_range", getRequest().URL.Path) form = getRequest().Form - actual = form.Get("query") - if actual != "up" { - t.Errorf("unexpected value %s for query", actual) - } - actual = form.Get("step") - if actual != "0.01" { - t.Errorf("unexpected value %s for step", actual) - } - if exitCode > 0 { - t.Error() - } + testutil.Equals(t, "up", form.Get("query")) + testutil.Equals(t, "0.01", form.Get("step")) + testutil.Equals(t, 0, exitCode) +} + +func TestQueryInstant(t *testing.T) { + s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "vector", "result": []}}`) + defer s.Close() + + p := &promqlPrinter{} + exitCode := QueryInstant(s.URL, "up", "300", p) + testutil.Equals(t, "/api/v1/query", getRequest().URL.Path) + form := getRequest().Form + testutil.Equals(t, "up", form.Get("query")) + testutil.Equals(t, "300", form.Get("time")) + testutil.Equals(t, 0, exitCode) } func mockServer(code int, body string) (*httptest.Server, func() *http.Request) { diff --git a/cmd/promtool/testdata/unittest.yml b/cmd/promtool/testdata/unittest.yml index f8fb2d758a..e25a09da5a 100644 --- a/cmd/promtool/testdata/unittest.yml +++ b/cmd/promtool/testdata/unittest.yml @@ -8,6 +8,14 @@ tests: input_series: - series: 'up{job="prometheus", instance="localhost:9090"}' values: "0+0x1440" + + promql_expr_test: + - expr: count(ALERTS) by (alertname, alertstate) + eval_time: 4m + exp_samples: + - labels: '{alertname="InstanceDown",alertstate="pending"}' + value: 1 + alert_rule_test: - eval_time: 1d alertname: InstanceDown diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 9667f7c76d..cb692e9ef8 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -30,6 +30,7 @@ import ( "text/tabwriter" "time" + "github.com/alecthomas/units" "github.com/go-kit/kit/log" "github.com/pkg/errors" "github.com/prometheus/prometheus/pkg/labels" @@ -199,7 +200,7 @@ func (b *writeBenchmark) ingestScrapesShard(lbls []labels.Labels, scrapeCount in total := uint64(0) for i := 0; i < scrapeCount; i++ { - app := b.storage.Appender() + app := b.storage.Appender(context.TODO()) ts += timeDelta for _, s := range scrape { @@ -363,12 +364,12 @@ func printBlocks(blocks []tsdb.BlockReader, humanReadable bool) { tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) defer tw.Flush() - fmt.Fprintln(tw, "BLOCK ULID\tMIN TIME\tMAX TIME\tDURATION\tNUM SAMPLES\tNUM CHUNKS\tNUM SERIES") + fmt.Fprintln(tw, "BLOCK ULID\tMIN TIME\tMAX TIME\tDURATION\tNUM SAMPLES\tNUM CHUNKS\tNUM SERIES\tSIZE") for _, b := range blocks { meta := b.Meta() fmt.Fprintf(tw, - "%v\t%v\t%v\t%v\t%v\t%v\t%v\n", + "%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n", meta.ULID, getFormatedTime(meta.MinTime, humanReadable), getFormatedTime(meta.MaxTime, humanReadable), @@ -376,6 +377,7 @@ func printBlocks(blocks []tsdb.BlockReader, humanReadable bool) { meta.Stats.NumSamples, meta.Stats.NumChunks, meta.Stats.NumSeries, + getFormatedBytes(b.Size(), humanReadable), ) } } @@ -387,6 +389,13 @@ func getFormatedTime(timestamp int64, humanReadable bool) string { return strconv.FormatInt(timestamp, 10) } +func getFormatedBytes(bytes int64, humanReadable bool) string { + if humanReadable { + return units.Base2Bytes(bytes).String() + } + return strconv.FormatInt(bytes, 10) +} + func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error) { db, err := tsdb.OpenDBReadOnly(path, nil) if err != nil { diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 42368b3915..cbdb8931ef 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -221,6 +221,16 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou // Current index in alertEvalTimes what we are looking at. curr := 0 + for _, g := range groups { + for _, r := range g.Rules() { + if alertRule, ok := r.(*rules.AlertingRule); ok { + // Mark alerting rules as restored, to ensure the ALERTS timeseries is + // created when they run. + alertRule.SetRestored(true) + } + } + } + var errs []error for ts := mint; ts.Before(maxt); ts = ts.Add(evalInterval) { // Collects the alerts asked for unit testing. diff --git a/config/config.go b/config/config.go index f8866ff5d4..223c8abfe2 100644 --- a/config/config.go +++ b/config/config.go @@ -23,11 +23,11 @@ import ( "time" "github.com/pkg/errors" - config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/config" "github.com/prometheus/common/model" yaml "gopkg.in/yaml.v2" - sd_config "github.com/prometheus/prometheus/discovery/config" + "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/relabel" ) @@ -48,7 +48,6 @@ func Load(s string) (*Config, error) { if err != nil { return nil, err } - cfg.original = s return cfg, nil } @@ -62,7 +61,7 @@ func LoadFile(filename string) (*Config, error) { if err != nil { return nil, errors.Wrapf(err, "parsing YAML file %s", filename) } - resolveFilepaths(filepath.Dir(filename), cfg) + cfg.SetDirectory(filepath.Dir(filename)) return cfg, nil } @@ -105,16 +104,16 @@ var ( // DefaultQueueConfig is the default remote queue configuration. DefaultQueueConfig = QueueConfig{ - // With a maximum of 1000 shards, assuming an average of 100ms remote write - // time and 100 samples per batch, we will be able to push 1M samples/s. - MaxShards: 1000, + // With a maximum of 200 shards, assuming an average of 100ms remote write + // time and 500 samples per batch, we will be able to push 1M samples/s. + MaxShards: 200, MinShards: 1, - MaxSamplesPerSend: 100, + MaxSamplesPerSend: 500, - // Each shard will have a max of 500 samples pending in it's channel, plus the pending - // samples that have been enqueued. Theoretically we should only ever have about 600 samples - // per shard pending. At 1000 shards that's 600k. - Capacity: 500, + // Each shard will have a max of 2500 samples pending in its channel, plus the pending + // samples that have been enqueued. Theoretically we should only ever have about 3000 samples + // per shard pending. At 200 shards that's 600k. + Capacity: 2500, BatchSendDeadline: model.Duration(5 * time.Second), // Backoff times for retrying a batch of samples on recoverable errors. @@ -137,80 +136,23 @@ type Config struct { RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"` RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"` - - // original is the input from which the config was parsed. - original string } -// resolveFilepaths joins all relative paths in a configuration -// with a given base directory. -func resolveFilepaths(baseDir string, cfg *Config) { - join := func(fp string) string { - if len(fp) > 0 && !filepath.IsAbs(fp) { - fp = filepath.Join(baseDir, fp) - } - return fp +// SetDirectory joins any relative file paths with dir. +func (c *Config) SetDirectory(dir string) { + c.GlobalConfig.SetDirectory(dir) + c.AlertingConfig.SetDirectory(dir) + for i, file := range c.RuleFiles { + c.RuleFiles[i] = config.JoinDir(dir, file) } - - for i, rf := range cfg.RuleFiles { - cfg.RuleFiles[i] = join(rf) + for _, c := range c.ScrapeConfigs { + c.SetDirectory(dir) } - - tlsPaths := func(cfg *config_util.TLSConfig) { - cfg.CAFile = join(cfg.CAFile) - cfg.CertFile = join(cfg.CertFile) - cfg.KeyFile = join(cfg.KeyFile) + for _, c := range c.RemoteWriteConfigs { + c.SetDirectory(dir) } - clientPaths := func(scfg *config_util.HTTPClientConfig) { - if scfg.BasicAuth != nil { - scfg.BasicAuth.PasswordFile = join(scfg.BasicAuth.PasswordFile) - } - scfg.BearerTokenFile = join(scfg.BearerTokenFile) - tlsPaths(&scfg.TLSConfig) - } - sdPaths := func(cfg *sd_config.ServiceDiscoveryConfig) { - for _, kcfg := range cfg.KubernetesSDConfigs { - clientPaths(&kcfg.HTTPClientConfig) - } - for _, mcfg := range cfg.MarathonSDConfigs { - mcfg.AuthTokenFile = join(mcfg.AuthTokenFile) - clientPaths(&mcfg.HTTPClientConfig) - } - for _, consulcfg := range cfg.ConsulSDConfigs { - tlsPaths(&consulcfg.TLSConfig) - } - for _, digitaloceancfg := range cfg.DigitalOceanSDConfigs { - clientPaths(&digitaloceancfg.HTTPClientConfig) - } - for _, dockerswarmcfg := range cfg.DockerSwarmSDConfigs { - clientPaths(&dockerswarmcfg.HTTPClientConfig) - } - for _, cfg := range cfg.OpenstackSDConfigs { - tlsPaths(&cfg.TLSConfig) - } - for _, cfg := range cfg.TritonSDConfigs { - tlsPaths(&cfg.TLSConfig) - } - for _, filecfg := range cfg.FileSDConfigs { - for i, fn := range filecfg.Files { - filecfg.Files[i] = join(fn) - } - } - } - - for _, cfg := range cfg.ScrapeConfigs { - clientPaths(&cfg.HTTPClientConfig) - sdPaths(&cfg.ServiceDiscoveryConfig) - } - for _, cfg := range cfg.AlertingConfig.AlertmanagerConfigs { - clientPaths(&cfg.HTTPClientConfig) - sdPaths(&cfg.ServiceDiscoveryConfig) - } - for _, cfg := range cfg.RemoteReadConfigs { - clientPaths(&cfg.HTTPClientConfig) - } - for _, cfg := range cfg.RemoteWriteConfigs { - clientPaths(&cfg.HTTPClientConfig) + for _, c := range c.RemoteReadConfigs { + c.SetDirectory(dir) } } @@ -311,6 +253,11 @@ type GlobalConfig struct { ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` } +// SetDirectory joins any relative file paths with dir. +func (c *GlobalConfig) SetDirectory(dir string) { + c.QueryLogFile = config.JoinDir(dir, c.QueryLogFile) +} + // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // Create a clean global config as the previous one was already populated @@ -379,14 +326,17 @@ type ScrapeConfig struct { MetricsPath string `yaml:"metrics_path,omitempty"` // The URL scheme with which to fetch metrics from targets. Scheme string `yaml:"scheme,omitempty"` - // More than this many samples post metric-relabelling will cause the scrape to fail. + // More than this many samples post metric-relabeling will cause the scrape to fail. SampleLimit uint `yaml:"sample_limit,omitempty"` + // More than this many targets after the target relabeling will cause the + // scrapes to fail. + TargetLimit uint `yaml:"target_limit,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. - ServiceDiscoveryConfig sd_config.ServiceDiscoveryConfig `yaml:",inline"` - HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` + ServiceDiscoveryConfigs discovery.Configs `yaml:"-"` + HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` // List of target relabel configurations. RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"` @@ -394,12 +344,16 @@ type ScrapeConfig struct { MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty"` } +// SetDirectory joins any relative file paths with dir. +func (c *ScrapeConfig) SetDirectory(dir string) { + c.ServiceDiscoveryConfigs.SetDirectory(dir) + c.HTTPClientConfig.SetDirectory(dir) +} + // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultScrapeConfig - type plain ScrapeConfig - err := unmarshal((*plain)(c)) - if err != nil { + if err := discovery.UnmarshalYAMLWithInlineConfigs(c, unmarshal); err != nil { return err } if len(c.JobName) == 0 { @@ -413,21 +367,10 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } - // The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer. - // We cannot make it a pointer as the parser panics for inlined pointer structs. - // Thus we just do its validation here. - if err := c.ServiceDiscoveryConfig.Validate(); err != nil { - return err - } - // Check for users putting URLs in target groups. if len(c.RelabelConfigs) == 0 { - for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { - for _, t := range tg.Targets { - if err := CheckTargetAddress(t[model.AddressLabel]); err != nil { - return err - } - } + if err := checkStaticTargets(c.ServiceDiscoveryConfigs); err != nil { + return err } } @@ -442,21 +385,27 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { } } - // Add index to the static config target groups for unique identification - // within scrape pool. - for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { - tg.Source = fmt.Sprintf("%d", i) - } - return nil } +// MarshalYAML implements the yaml.Marshaler interface. +func (c *ScrapeConfig) MarshalYAML() (interface{}, error) { + return discovery.MarshalYAMLWithInlineConfigs(c) +} + // AlertingConfig configures alerting and alertmanager related configs. type AlertingConfig struct { AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"` AlertmanagerConfigs AlertmanagerConfigs `yaml:"alertmanagers,omitempty"` } +// SetDirectory joins any relative file paths with dir. +func (c *AlertingConfig) SetDirectory(dir string) { + for _, c := range c.AlertmanagerConfigs { + c.SetDirectory(dir) + } +} + // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *AlertingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // Create a clean global config as the previous one was already populated @@ -526,8 +475,8 @@ type AlertmanagerConfig struct { // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. - ServiceDiscoveryConfig sd_config.ServiceDiscoveryConfig `yaml:",inline"` - HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` + ServiceDiscoveryConfigs discovery.Configs `yaml:"-"` + HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` // The URL scheme to use when talking to Alertmanagers. Scheme string `yaml:"scheme,omitempty"` @@ -543,11 +492,16 @@ type AlertmanagerConfig struct { RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"` } +// SetDirectory joins any relative file paths with dir. +func (c *AlertmanagerConfig) SetDirectory(dir string) { + c.ServiceDiscoveryConfigs.SetDirectory(dir) + c.HTTPClientConfig.SetDirectory(dir) +} + // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultAlertmanagerConfig - type plain AlertmanagerConfig - if err := unmarshal((*plain)(c)); err != nil { + if err := discovery.UnmarshalYAMLWithInlineConfigs(c, unmarshal); err != nil { return err } @@ -558,21 +512,10 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er return err } - // The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer. - // We cannot make it a pointer as the parser panics for inlined pointer structs. - // Thus we just do its validation here. - if err := c.ServiceDiscoveryConfig.Validate(); err != nil { - return err - } - // Check for users putting URLs in target groups. if len(c.RelabelConfigs) == 0 { - for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { - for _, t := range tg.Targets { - if err := CheckTargetAddress(t[model.AddressLabel]); err != nil { - return err - } - } + if err := checkStaticTargets(c.ServiceDiscoveryConfigs); err != nil { + return err } } @@ -582,12 +525,28 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er } } - // Add index to the static config target groups for unique identification - // within scrape pool. - for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { - tg.Source = fmt.Sprintf("%d", i) - } + return nil +} +// MarshalYAML implements the yaml.Marshaler interface. +func (c *AlertmanagerConfig) MarshalYAML() (interface{}, error) { + return discovery.MarshalYAMLWithInlineConfigs(c) +} + +func checkStaticTargets(configs discovery.Configs) error { + for _, cfg := range configs { + sc, ok := cfg.(discovery.StaticConfig) + if !ok { + continue + } + for _, tg := range sc { + for _, t := range tg.Targets { + if err := CheckTargetAddress(t[model.AddressLabel]); err != nil { + return err + } + } + } + } return nil } @@ -600,29 +559,22 @@ func CheckTargetAddress(address model.LabelValue) error { return nil } -// ClientCert contains client cert credentials. -type ClientCert struct { - Cert string `yaml:"cert"` - Key config_util.Secret `yaml:"key"` -} - -// FileSDConfig is the configuration for file based discovery. -type FileSDConfig struct { - Files []string `yaml:"files"` - RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` -} - // RemoteWriteConfig is the configuration for writing to remote storage. type RemoteWriteConfig struct { - URL *config_util.URL `yaml:"url"` + URL *config.URL `yaml:"url"` RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` WriteRelabelConfigs []*relabel.Config `yaml:"write_relabel_configs,omitempty"` Name string `yaml:"name,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. - HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` - QueueConfig QueueConfig `yaml:"queue_config,omitempty"` + HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` + QueueConfig QueueConfig `yaml:"queue_config,omitempty"` +} + +// SetDirectory joins any relative file paths with dir. +func (c *RemoteWriteConfig) SetDirectory(dir string) { + c.HTTPClientConfig.SetDirectory(dir) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -673,20 +625,25 @@ type QueueConfig struct { // RemoteReadConfig is the configuration for reading from remote storage. type RemoteReadConfig struct { - URL *config_util.URL `yaml:"url"` - RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` - ReadRecent bool `yaml:"read_recent,omitempty"` - Name string `yaml:"name,omitempty"` + URL *config.URL `yaml:"url"` + RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` + ReadRecent bool `yaml:"read_recent,omitempty"` + Name string `yaml:"name,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. - HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` + HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` // RequiredMatchers is an optional list of equality matchers which have to // be present in a selector to query the remote read endpoint. RequiredMatchers model.LabelSet `yaml:"required_matchers,omitempty"` } +// SetDirectory joins any relative file paths with dir. +func (c *RemoteReadConfig) SetDirectory(dir string) { + c.HTTPClientConfig.SetDirectory(dir) +} + // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultRemoteReadConfig diff --git a/config/config_default_test.go b/config/config_default_test.go index 932643b882..e0f09aa11b 100644 --- a/config/config_default_test.go +++ b/config/config_default_test.go @@ -24,5 +24,4 @@ var ruleFilesExpectedConf = &Config{ "testdata/rules/second.rules", "/absolute/third.rules", }, - original: "", } diff --git a/config/config_test.go b/config/config_test.go index e485653b46..85234dd273 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -23,18 +23,20 @@ import ( "testing" "time" - config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/config" "github.com/prometheus/common/model" "gopkg.in/yaml.v2" + "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/azure" - sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/digitalocean" "github.com/prometheus/prometheus/discovery/dns" "github.com/prometheus/prometheus/discovery/dockerswarm" "github.com/prometheus/prometheus/discovery/ec2" + "github.com/prometheus/prometheus/discovery/eureka" "github.com/prometheus/prometheus/discovery/file" + "github.com/prometheus/prometheus/discovery/hetzner" "github.com/prometheus/prometheus/discovery/kubernetes" "github.com/prometheus/prometheus/discovery/marathon" "github.com/prometheus/prometheus/discovery/openstack" @@ -46,12 +48,12 @@ import ( "github.com/prometheus/prometheus/util/testutil" ) -func mustParseURL(u string) *config_util.URL { +func mustParseURL(u string) *config.URL { parsed, err := url.Parse(u) if err != nil { panic(err) } - return &config_util.URL{URL: parsed} + return &config.URL{URL: parsed} } var expectedConf = &Config{ @@ -93,8 +95,8 @@ var expectedConf = &Config{ RemoteTimeout: model.Duration(30 * time.Second), QueueConfig: DefaultQueueConfig, Name: "rw_tls", - HTTPClientConfig: config_util.HTTPClientConfig{ - TLSConfig: config_util.TLSConfig{ + HTTPClientConfig: config.HTTPClientConfig{ + TLSConfig: config.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, @@ -115,8 +117,8 @@ var expectedConf = &Config{ ReadRecent: false, Name: "read_special", RequiredMatchers: model.LabelSet{"job": "special"}, - HTTPClientConfig: config_util.HTTPClientConfig{ - TLSConfig: config_util.TLSConfig{ + HTTPClientConfig: config.HTTPClientConfig{ + TLSConfig: config.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, @@ -136,12 +138,20 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - HTTPClientConfig: config_util.HTTPClientConfig{ + HTTPClientConfig: config.HTTPClientConfig{ BearerTokenFile: filepath.FromSlash("testdata/valid_token_file"), }, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - StaticConfigs: []*targetgroup.Group{ + ServiceDiscoveryConfigs: discovery.Configs{ + &file.SDConfig{ + Files: []string{"testdata/foo/*.slow.json", "testdata/foo/*.slow.yml", "testdata/single/file.yml"}, + RefreshInterval: model.Duration(10 * time.Minute), + }, + &file.SDConfig{ + Files: []string{"testdata/bar/*.yaml"}, + RefreshInterval: model.Duration(5 * time.Minute), + }, + discovery.StaticConfig{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, @@ -154,17 +164,6 @@ var expectedConf = &Config{ Source: "0", }, }, - - FileSDConfigs: []*file.SDConfig{ - { - Files: []string{"testdata/foo/*.slow.json", "testdata/foo/*.slow.yml", "testdata/single/file.yml"}, - RefreshInterval: model.Duration(10 * time.Minute), - }, - { - Files: []string{"testdata/bar/*.yaml"}, - RefreshInterval: model.Duration(5 * time.Minute), - }, - }, }, RelabelConfigs: []*relabel.Config{ @@ -206,8 +205,8 @@ var expectedConf = &Config{ ScrapeTimeout: model.Duration(5 * time.Second), SampleLimit: 1000, - HTTPClientConfig: config_util.HTTPClientConfig{ - BasicAuth: &config_util.BasicAuth{ + HTTPClientConfig: config.HTTPClientConfig{ + BasicAuth: &config.BasicAuth{ Username: "admin_name", Password: "multiline\nmysecret\ntest", }, @@ -215,23 +214,21 @@ var expectedConf = &Config{ MetricsPath: "/my_path", Scheme: "https", - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - DNSSDConfigs: []*dns.SDConfig{ - { - Names: []string{ - "first.dns.address.domain.com", - "second.dns.address.domain.com", - }, - RefreshInterval: model.Duration(15 * time.Second), - Type: "SRV", + ServiceDiscoveryConfigs: discovery.Configs{ + &dns.SDConfig{ + Names: []string{ + "first.dns.address.domain.com", + "second.dns.address.domain.com", }, - { - Names: []string{ - "first.dns.address.domain.com", - }, - RefreshInterval: model.Duration(30 * time.Second), - Type: "SRV", + RefreshInterval: model.Duration(15 * time.Second), + Type: "SRV", + }, + &dns.SDConfig{ + Names: []string{ + "first.dns.address.domain.com", }, + RefreshInterval: model.Duration(30 * time.Second), + Type: "SRV", }, }, @@ -298,24 +295,22 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - ConsulSDConfigs: []*consul.SDConfig{ - { - Server: "localhost:1234", - Token: "mysecret", - Services: []string{"nginx", "cache", "mysql"}, - ServiceTags: []string{"canary", "v1"}, - NodeMeta: map[string]string{"rack": "123"}, - TagSeparator: consul.DefaultSDConfig.TagSeparator, - Scheme: "https", - RefreshInterval: consul.DefaultSDConfig.RefreshInterval, - AllowStale: true, - TLSConfig: config_util.TLSConfig{ - CertFile: filepath.FromSlash("testdata/valid_cert_file"), - KeyFile: filepath.FromSlash("testdata/valid_key_file"), - CAFile: filepath.FromSlash("testdata/valid_ca_file"), - InsecureSkipVerify: false, - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &consul.SDConfig{ + Server: "localhost:1234", + Token: "mysecret", + Services: []string{"nginx", "cache", "mysql"}, + ServiceTags: []string{"canary", "v1"}, + NodeMeta: map[string]string{"rack": "123"}, + TagSeparator: consul.DefaultSDConfig.TagSeparator, + Scheme: "https", + RefreshInterval: consul.DefaultSDConfig.RefreshInterval, + AllowStale: true, + TLSConfig: config.TLSConfig{ + CertFile: filepath.FromSlash("testdata/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/valid_key_file"), + CAFile: filepath.FromSlash("testdata/valid_ca_file"), + InsecureSkipVerify: false, }, }, }, @@ -341,8 +336,8 @@ var expectedConf = &Config{ MetricsPath: "/metrics", Scheme: "http", - HTTPClientConfig: config_util.HTTPClientConfig{ - TLSConfig: config_util.TLSConfig{ + HTTPClientConfig: config.HTTPClientConfig{ + TLSConfig: config.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, @@ -360,23 +355,21 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - KubernetesSDConfigs: []*kubernetes.SDConfig{ - { - APIServer: kubernetesSDHostURL(), - Role: kubernetes.RoleEndpoint, - HTTPClientConfig: config_util.HTTPClientConfig{ - BasicAuth: &config_util.BasicAuth{ - Username: "myusername", - Password: "mysecret", - }, - TLSConfig: config_util.TLSConfig{ - CertFile: filepath.FromSlash("testdata/valid_cert_file"), - KeyFile: filepath.FromSlash("testdata/valid_key_file"), - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &kubernetes.SDConfig{ + APIServer: kubernetesSDHostURL(), + Role: kubernetes.RoleEndpoint, + HTTPClientConfig: config.HTTPClientConfig{ + BasicAuth: &config.BasicAuth{ + Username: "myusername", + Password: "mysecret", + }, + TLSConfig: config.TLSConfig{ + CertFile: filepath.FromSlash("testdata/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, - NamespaceDiscovery: kubernetes.NamespaceDiscovery{}, }, + NamespaceDiscovery: kubernetes.NamespaceDiscovery{}, }, }, }, @@ -389,22 +382,20 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - HTTPClientConfig: config_util.HTTPClientConfig{ - BasicAuth: &config_util.BasicAuth{ + HTTPClientConfig: config.HTTPClientConfig{ + BasicAuth: &config.BasicAuth{ Username: "myusername", PasswordFile: filepath.FromSlash("testdata/valid_password_file"), }, }, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - KubernetesSDConfigs: []*kubernetes.SDConfig{ - { - APIServer: kubernetesSDHostURL(), - Role: kubernetes.RoleEndpoint, - NamespaceDiscovery: kubernetes.NamespaceDiscovery{ - Names: []string{ - "default", - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &kubernetes.SDConfig{ + APIServer: kubernetesSDHostURL(), + Role: kubernetes.RoleEndpoint, + NamespaceDiscovery: kubernetes.NamespaceDiscovery{ + Names: []string{ + "default", }, }, }, @@ -420,19 +411,17 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - MarathonSDConfigs: []*marathon.SDConfig{ - { - Servers: []string{ - "https://marathon.example.com:443", - }, - RefreshInterval: model.Duration(30 * time.Second), - AuthToken: config_util.Secret("mysecret"), - HTTPClientConfig: config_util.HTTPClientConfig{ - TLSConfig: config_util.TLSConfig{ - CertFile: filepath.FromSlash("testdata/valid_cert_file"), - KeyFile: filepath.FromSlash("testdata/valid_key_file"), - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &marathon.SDConfig{ + Servers: []string{ + "https://marathon.example.com:443", + }, + RefreshInterval: model.Duration(30 * time.Second), + AuthToken: "mysecret", + HTTPClientConfig: config.HTTPClientConfig{ + TLSConfig: config.TLSConfig{ + CertFile: filepath.FromSlash("testdata/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, }, }, @@ -448,24 +437,22 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - EC2SDConfigs: []*ec2.SDConfig{ - { - Region: "us-east-1", - AccessKey: "access", - SecretKey: "mysecret", - Profile: "profile", - RefreshInterval: model.Duration(60 * time.Second), - Port: 80, - Filters: []*ec2.Filter{ - { - Name: "tag:environment", - Values: []string{"prod"}, - }, - { - Name: "tag:service", - Values: []string{"web", "db"}, - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &ec2.SDConfig{ + Region: "us-east-1", + AccessKey: "access", + SecretKey: "mysecret", + Profile: "profile", + RefreshInterval: model.Duration(60 * time.Second), + Port: 80, + Filters: []*ec2.Filter{ + { + Name: "tag:environment", + Values: []string{"prod"}, + }, + { + Name: "tag:service", + Values: []string{"web", "db"}, }, }, }, @@ -481,18 +468,16 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - AzureSDConfigs: []*azure.SDConfig{ - { - Environment: "AzurePublicCloud", - SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11", - TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2", - ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C", - ClientSecret: "mysecret", - AuthenticationMethod: "OAuth", - RefreshInterval: model.Duration(5 * time.Minute), - Port: 9100, - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &azure.SDConfig{ + Environment: "AzurePublicCloud", + SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11", + TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2", + ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C", + ClientSecret: "mysecret", + AuthenticationMethod: "OAuth", + RefreshInterval: model.Duration(5 * time.Minute), + Port: 9100, }, }, }, @@ -506,13 +491,11 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - NerveSDConfigs: []*zookeeper.NerveSDConfig{ - { - Servers: []string{"localhost"}, - Paths: []string{"/monitoring"}, - Timeout: model.Duration(10 * time.Second), - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &zookeeper.NerveSDConfig{ + Servers: []string{"localhost"}, + Paths: []string{"/monitoring"}, + Timeout: model.Duration(10 * time.Second), }, }, }, @@ -526,8 +509,8 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - StaticConfigs: []*targetgroup.Group{ + ServiceDiscoveryConfigs: discovery.Configs{ + discovery.StaticConfig{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, @@ -547,8 +530,8 @@ var expectedConf = &Config{ MetricsPath: "/federate", Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - StaticConfigs: []*targetgroup.Group{ + ServiceDiscoveryConfigs: discovery.Configs{ + discovery.StaticConfig{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, @@ -568,8 +551,8 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - StaticConfigs: []*targetgroup.Group{ + ServiceDiscoveryConfigs: discovery.Configs{ + discovery.StaticConfig{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, @@ -589,20 +572,18 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - TritonSDConfigs: []*triton.SDConfig{ - { - Account: "testAccount", - Role: "container", - DNSSuffix: "triton.example.com", - Endpoint: "triton.example.com", - Port: 9163, - RefreshInterval: model.Duration(60 * time.Second), - Version: 1, - TLSConfig: config_util.TLSConfig{ - CertFile: "testdata/valid_cert_file", - KeyFile: "testdata/valid_key_file", - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &triton.SDConfig{ + Account: "testAccount", + Role: "container", + DNSSuffix: "triton.example.com", + Endpoint: "triton.example.com", + Port: 9163, + RefreshInterval: model.Duration(60 * time.Second), + Version: 1, + TLSConfig: config.TLSConfig{ + CertFile: "testdata/valid_cert_file", + KeyFile: "testdata/valid_key_file", }, }, }, @@ -617,15 +598,13 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - DigitalOceanSDConfigs: []*digitalocean.SDConfig{ - { - HTTPClientConfig: config_util.HTTPClientConfig{ - BearerToken: "abcdef", - }, - Port: 80, - RefreshInterval: model.Duration(60 * time.Second), + ServiceDiscoveryConfigs: discovery.Configs{ + &digitalocean.SDConfig{ + HTTPClientConfig: config.HTTPClientConfig{ + BearerToken: "abcdef", }, + Port: 80, + RefreshInterval: model.Duration(60 * time.Second), }, }, }, @@ -639,14 +618,12 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - DockerSwarmSDConfigs: []*dockerswarm.SDConfig{ - { - Host: "http://127.0.0.1:2375", - Role: "nodes", - Port: 80, - RefreshInterval: model.Duration(60 * time.Second), - }, + ServiceDiscoveryConfigs: discovery.Configs{ + &dockerswarm.SDConfig{ + Host: "http://127.0.0.1:2375", + Role: "nodes", + Port: 80, + RefreshInterval: model.Duration(60 * time.Second), }, }, }, @@ -660,21 +637,61 @@ var expectedConf = &Config{ MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - OpenstackSDConfigs: []*openstack.SDConfig{ - { - Role: "instance", - Region: "RegionOne", - Port: 80, - Availability: "public", - RefreshInterval: model.Duration(60 * time.Second), - TLSConfig: config_util.TLSConfig{ - CAFile: "testdata/valid_ca_file", - CertFile: "testdata/valid_cert_file", - KeyFile: "testdata/valid_key_file", - }, + ServiceDiscoveryConfigs: discovery.Configs{&openstack.SDConfig{ + Role: "instance", + Region: "RegionOne", + Port: 80, + Availability: "public", + RefreshInterval: model.Duration(60 * time.Second), + TLSConfig: config.TLSConfig{ + CAFile: "testdata/valid_ca_file", + CertFile: "testdata/valid_cert_file", + KeyFile: "testdata/valid_key_file", + }}, + }, + }, + { + JobName: "hetzner", + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + + MetricsPath: DefaultScrapeConfig.MetricsPath, + Scheme: DefaultScrapeConfig.Scheme, + + ServiceDiscoveryConfigs: discovery.Configs{ + &hetzner.SDConfig{ + HTTPClientConfig: config.HTTPClientConfig{ + BearerToken: "abcdef", }, + Port: 80, + RefreshInterval: model.Duration(60 * time.Second), + Role: "hcloud", }, + &hetzner.SDConfig{ + HTTPClientConfig: config.HTTPClientConfig{ + BasicAuth: &config.BasicAuth{Username: "abcdef", Password: "abcdef"}, + }, + Port: 80, + RefreshInterval: model.Duration(60 * time.Second), + Role: "robot", + }, + }, + }, + { + JobName: "service-eureka", + + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + + MetricsPath: DefaultScrapeConfig.MetricsPath, + Scheme: DefaultScrapeConfig.Scheme, + + ServiceDiscoveryConfigs: discovery.Configs{&eureka.SDConfig{ + Server: "http://eureka.example.com:8761/eureka", + RefreshInterval: model.Duration(30 * time.Second), + }, }, }, }, @@ -684,8 +701,8 @@ var expectedConf = &Config{ Scheme: "https", Timeout: model.Duration(10 * time.Second), APIVersion: AlertmanagerAPIVersionV1, - ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ - StaticConfigs: []*targetgroup.Group{ + ServiceDiscoveryConfigs: discovery.Configs{ + discovery.StaticConfig{ { Targets: []model.LabelSet{ {model.AddressLabel: "1.2.3.4:9093"}, @@ -699,7 +716,19 @@ var expectedConf = &Config{ }, }, }, - original: "", +} + +func TestYAMLRoundtrip(t *testing.T) { + want, err := LoadFile("testdata/roundtrip.good.yml") + testutil.Ok(t, err) + + out, err := yaml.Marshal(want) + + testutil.Ok(t, err) + got := &Config{} + testutil.Ok(t, yaml.UnmarshalStrict(out, got)) + + testutil.Equals(t, want, got) } func TestLoadConfig(t *testing.T) { @@ -710,8 +739,6 @@ func TestLoadConfig(t *testing.T) { c, err := LoadFile("testdata/conf.good.yml") testutil.Ok(t, err) - - expectedConf.original = c.original testutil.Equals(t, expectedConf, c) } @@ -736,7 +763,7 @@ func TestElideSecrets(t *testing.T) { yamlConfig := string(config) matches := secretRe.FindAllStringIndex(yamlConfig, -1) - testutil.Assert(t, len(matches) == 8, "wrong number of secret matches found") + testutil.Assert(t, len(matches) == 10, "wrong number of secret matches found") testutil.Assert(t, !strings.Contains(yamlConfig, "mysecret"), "yaml marshal reveals authentication credentials.") } @@ -745,8 +772,6 @@ func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { // Parse a valid file that sets a rule files with an absolute path c, err := LoadFile(ruleFilesConfigFile) testutil.Ok(t, err) - - ruleFilesExpectedConf.original = c.original testutil.Equals(t, ruleFilesExpectedConf, c) } @@ -834,7 +859,7 @@ var expectedErrors = []struct { errMsg: "invalid rule file path", }, { filename: "unknown_attr.bad.yml", - errMsg: "field consult_sd_configs not found in type config.plain", + errMsg: "field consult_sd_configs not found in type", }, { filename: "bearertoken.bad.yml", errMsg: "at most one of bearer_token & bearer_token_file must be configured", @@ -984,6 +1009,18 @@ var expectedErrors = []struct { filename: "empty_static_config.bad.yml", errMsg: "empty or null section in static_configs", }, + { + filename: "hetzner_role.bad.yml", + errMsg: "unknown role", + }, + { + filename: "eureka_no_server.bad.yml", + errMsg: "empty or null eureka server", + }, + { + filename: "eureka_invalid_server.bad.yml", + errMsg: "invalid eureka server URL", + }, } func TestBadConfigs(t *testing.T) { @@ -1022,11 +1059,10 @@ func TestEmptyGlobalBlock(t *testing.T) { c, err := Load("global:\n") testutil.Ok(t, err) exp := DefaultConfig - exp.original = "global:\n" testutil.Equals(t, exp, *c) } -func kubernetesSDHostURL() config_util.URL { +func kubernetesSDHostURL() config.URL { tURL, _ := url.Parse("https://localhost:1234") - return config_util.URL{URL: tURL} + return config.URL{URL: tURL} } diff --git a/config/config_windows_test.go b/config/config_windows_test.go index ac97d31052..7fd1d46f6d 100644 --- a/config/config_windows_test.go +++ b/config/config_windows_test.go @@ -22,5 +22,4 @@ var ruleFilesExpectedConf = &Config{ "testdata\\rules\\second.rules", "c:\\absolute\\third.rules", }, - original: "", } diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 7d0b441ef1..b45541fbd0 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -279,6 +279,19 @@ scrape_configs: cert_file: valid_cert_file key_file: valid_key_file +- job_name: hetzner + hetzner_sd_configs: + - role: hcloud + bearer_token: abcdef + - role: robot + basic_auth: + username: abcdef + password: abcdef + +- job_name: service-eureka + eureka_sd_configs: + - server: 'http://eureka.example.com:8761/eureka' + alerting: alertmanagers: - scheme: https diff --git a/config/testdata/eureka_invalid_server.bad.yml b/config/testdata/eureka_invalid_server.bad.yml new file mode 100644 index 0000000000..0c8ae428ac --- /dev/null +++ b/config/testdata/eureka_invalid_server.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + +- job_name: eureka + eureka_sd_configs: + - server: eureka.com diff --git a/config/testdata/eureka_no_server.bad.yml b/config/testdata/eureka_no_server.bad.yml new file mode 100644 index 0000000000..35c578a6ce --- /dev/null +++ b/config/testdata/eureka_no_server.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + +- job_name: eureka + eureka_sd_configs: + - server: diff --git a/config/testdata/hetzner_role.bad.yml b/config/testdata/hetzner_role.bad.yml new file mode 100644 index 0000000000..0a5cc8c48e --- /dev/null +++ b/config/testdata/hetzner_role.bad.yml @@ -0,0 +1,4 @@ +scrape_configs: +- hetzner_sd_configs: + - role: invalid + diff --git a/config/testdata/roundtrip.good.yml b/config/testdata/roundtrip.good.yml new file mode 100644 index 0000000000..4aa3c432ef --- /dev/null +++ b/config/testdata/roundtrip.good.yml @@ -0,0 +1,143 @@ +alerting: + alertmanagers: + - scheme: https + + file_sd_configs: + - files: + - foo/*.slow.json + - foo/*.slow.yml + refresh_interval: 10m + - files: + - bar/*.yaml + + static_configs: + - targets: + - 1.2.3.4:9093 + - 1.2.3.5:9093 + - 1.2.3.6:9093 + +scrape_configs: + +- job_name: foo + static_configs: + - targets: + - localhost:9090 + - localhost:9191 + labels: + my: label + your: label + +- job_name: bar + + azure_sd_configs: + - environment: AzurePublicCloud + authentication_method: OAuth + subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 + tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 + client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C + client_secret: + port: 9100 + + consul_sd_configs: + - server: localhost:1234 + token: + services: [nginx, cache, mysql] + tags: [canary, v1] + node_meta: + rack: "123" + allow_stale: true + scheme: https + tls_config: + ca_file: valid_ca_file + cert_file: valid_cert_file + key_file: valid_key_file + + digitalocean_sd_configs: + - bearer_token: + + dockerswarm_sd_configs: + - host: http://127.0.0.1:2375 + role: nodes + + dns_sd_configs: + - refresh_interval: 15s + names: + - first.dns.address.domain.com + - second.dns.address.domain.com + - names: + - first.dns.address.domain.com + + ec2_sd_configs: + - region: us-east-1 + access_key: access + secret_key: + profile: profile + filters: + - name: tag:environment + values: + - prod + - name: tag:service + values: + - web + - db + + file_sd_configs: + - files: + - single/file.yml + + kubernetes_sd_configs: + - role: endpoints + api_server: https://localhost:1234 + tls_config: + cert_file: valid_cert_file + key_file: valid_key_file + basic_auth: + username: username + password: + - role: endpoints + api_server: https://localhost:1234 + namespaces: + names: + - default + basic_auth: + username: username + password_file: valid_password_file + + marathon_sd_configs: + - servers: + - https://marathon.example.com:443 + auth_token: + tls_config: + cert_file: valid_cert_file + key_file: valid_key_file + + nerve_sd_configs: + - servers: + - localhost + paths: + - /monitoring + + openstack_sd_configs: + - role: instance + region: RegionOne + port: 80 + refresh_interval: 1m + tls_config: + ca_file: valid_ca_file + cert_file: valid_cert_file + key_file: valid_key_file + + static_configs: + - targets: + - localhost:9093 + + triton_sd_configs: + - account: testAccount + dns_suffix: triton.example.com + endpoint: triton.example.com + port: 9163 + refresh_interval: 1m + version: 1 + tls_config: + cert_file: valid_cert_file + key_file: valid_key_file diff --git a/console_libraries/prom.lib b/console_libraries/prom.lib index 1edf475e1a..3efbf36472 100644 --- a/console_libraries/prom.lib +++ b/console_libraries/prom.lib @@ -2,15 +2,15 @@ {{/* Load Prometheus console library JS/CSS. Should go in */}} {{ define "prom_console_head" }} - + - + - + - + - + diff --git a/web/ui/templates/alerts.html b/web/ui/templates/alerts.html index af8ab5c52f..c531fc8254 100644 --- a/web/ui/templates/alerts.html +++ b/web/ui/templates/alerts.html @@ -8,13 +8,13 @@

Alerts


diff --git a/web/ui/templates/rules.html b/web/ui/templates/rules.html index 327ca0fc15..5c1c15db2b 100644 --- a/web/ui/templates/rules.html +++ b/web/ui/templates/rules.html @@ -10,8 +10,8 @@

{{.Name}}

-

{{if .GetEvaluationTimestamp.IsZero}}Never{{else}}{{since .GetEvaluationTimestamp}} ago{{end}}

-

{{humanizeDuration .GetEvaluationDuration.Seconds}}

+

{{if .GetLastEvaluation.IsZero}}Never{{else}}{{since .GetLastEvaluation}} ago{{end}}

+

{{humanizeDuration .GetEvaluationTime.Seconds}}

diff --git a/web/ui/templates/targets.html b/web/ui/templates/targets.html index 813f858f74..07e1dcd40b 100644 --- a/web/ui/templates/targets.html +++ b/web/ui/templates/targets.html @@ -7,15 +7,18 @@ {{define "content"}}

Targets

-
- - -
-
+
+
+ + +
+
+ +
{{- range $job, $pool := .TargetPools}} {{- $healthy := numHealthy $pool}} diff --git a/web/web.go b/web/web.go index fbd4043692..51ab23ad35 100644 --- a/web/web.go +++ b/web/web.go @@ -34,7 +34,6 @@ import ( "sort" "strings" "sync" - "sync/atomic" template_text "text/template" "time" @@ -51,11 +50,8 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/common/route" "github.com/prometheus/common/server" - "github.com/prometheus/prometheus/tsdb" - "github.com/prometheus/prometheus/tsdb/index" - "github.com/soheilhy/cmux" + "go.uber.org/atomic" "golang.org/x/net/netutil" - "google.golang.org/grpc" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/notifier" @@ -64,9 +60,10 @@ import ( "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/template" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/util/httputil" api_v1 "github.com/prometheus/prometheus/web/api/v1" - api_v2 "github.com/prometheus/prometheus/web/api/v2" "github.com/prometheus/prometheus/web/ui" ) @@ -202,7 +199,7 @@ type Handler struct { mtx sync.RWMutex now func() model.Time - ready uint32 // ready is uint32 rather than boolean to be able to use atomic functions. + ready atomic.Uint32 // ready is uint32 rather than boolean to be able to use atomic functions. } // ApplyConfig updates the config field of the Handler struct @@ -293,9 +290,8 @@ func New(logger log.Logger, o *Options) *Handler { notifier: o.Notifier, now: model.Now, - - ready: 0, } + h.ready.Store(0) factoryTr := func(_ context.Context) api_v1.TargetRetriever { return h.scrapeManager } factoryAr := func(_ context.Context) api_v1.AlertmanagerRetriever { return h.notifier } @@ -325,6 +321,7 @@ func New(logger log.Logger, o *Options) *Handler { h.options.CORSOrigin, h.runtimeInfo, h.versionInfo, + o.Gatherer, ) if o.RoutePrefix != "/" { @@ -484,13 +481,12 @@ func serveDebug(w http.ResponseWriter, req *http.Request) { // Ready sets Handler to be ready. func (h *Handler) Ready() { - atomic.StoreUint32(&h.ready, 1) + h.ready.Store(1) } // Verifies whether the server is ready or not. func (h *Handler) isReady() bool { - ready := atomic.LoadUint32(&h.ready) - return ready > 0 + return h.ready.Load() > 0 } // Checks if server is ready, calls f if it is, returns 503 if it is not. @@ -505,11 +501,6 @@ func (h *Handler) testReady(f http.HandlerFunc) http.HandlerFunc { } } -// Checks if server is ready, calls f if it is, returns 503 if it is not. -func (h *Handler) testReadyHandler(f http.Handler) http.HandlerFunc { - return h.testReady(f.ServeHTTP) -} - // Quit returns the receive-only quit channel. func (h *Handler) Quit() <-chan struct{} { return h.quitCh @@ -535,27 +526,6 @@ func (h *Handler) Run(ctx context.Context) error { conntrack.TrackWithName("http"), conntrack.TrackWithTracing()) - var ( - m = cmux.New(listener) - // See https://github.com/grpc/grpc-go/issues/2636 for why we need to use MatchWithWriters(). - grpcl = m.MatchWithWriters(cmux.HTTP2MatchHeaderFieldSendSettings("content-type", "application/grpc")) - httpl = m.Match(cmux.HTTP1Fast()) - grpcSrv = grpc.NewServer() - ) - av2 := api_v2.New( - h.options.LocalStorage, - h.options.TSDBDir, - h.options.EnableAdminAPI, - ) - av2.RegisterGRPC(grpcSrv) - - hh, err := av2.HTTPHandler(ctx, h.options.ListenAddress) - if err != nil { - return err - } - - hhFunc := h.testReadyHandler(hh) - operationName := nethttp.OperationNameFunc(func(r *http.Request) string { return fmt.Sprintf("%s %s", r.Method, r.URL.Path) }) @@ -574,13 +544,6 @@ func (h *Handler) Run(ctx context.Context) error { mux.Handle(apiPath+"/v1/", http.StripPrefix(apiPath+"/v1", av1)) - mux.Handle(apiPath+"/", http.StripPrefix(apiPath, - http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - httputil.SetCORS(w, h.options.CORSOrigin, r) - hhFunc(w, r) - }), - )) - errlog := stdlog.New(log.NewStdlibAdapter(level.Error(h.logger)), "", 0) httpSrv := &http.Server{ @@ -591,13 +554,7 @@ func (h *Handler) Run(ctx context.Context) error { errCh := make(chan error) go func() { - errCh <- httpSrv.Serve(httpl) - }() - go func() { - errCh <- grpcSrv.Serve(grpcl) - }() - go func() { - errCh <- m.Serve() + errCh <- httpSrv.Serve(listener) }() select { @@ -605,7 +562,6 @@ func (h *Handler) Run(ctx context.Context) error { return e case <-ctx.Done(): httpSrv.Shutdown(ctx) - grpcSrv.GracefulStop() return nil } } @@ -841,10 +797,6 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) { } for _, mF := range metrics { switch *mF.Name { - case "prometheus_tsdb_head_chunks": - status.ChunkCount = int64(toFloat64(mF)) - case "prometheus_tsdb_head_series": - status.TimeSeriesCount = int64(toFloat64(mF)) case "prometheus_tsdb_wal_corruptions_total": status.CorruptionCount = int64(toFloat64(mF)) case "prometheus_config_last_reload_successful": diff --git a/web/web_test.go b/web/web_test.go index 96c4c5c4f5..0b4cd9edb6 100644 --- a/web/web_test.go +++ b/web/web_test.go @@ -15,12 +15,16 @@ package web import ( "context" + "encoding/json" "fmt" + "io" "io/ioutil" + "net" "net/http" "net/http/httptest" "net/url" "os" + "path/filepath" "strconv" "strings" "testing" @@ -103,7 +107,7 @@ func TestReadyAndHealthy(t *testing.T) { dbDir, err := ioutil.TempDir("", "tsdb-ready") testutil.Ok(t, err) - defer testutil.Ok(t, os.RemoveAll(dbDir)) + defer func() { testutil.Ok(t, os.RemoveAll(dbDir)) }() db, err := tsdb.Open(dbDir, nil, nil, nil) testutil.Ok(t, err) @@ -138,164 +142,89 @@ func TestReadyAndHealthy(t *testing.T) { webHandler.config = &config.Config{} webHandler.notifier = ¬ifier.Manager{} + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() go func() { - err := webHandler.Run(context.Background()) + err := webHandler.Run(ctx) if err != nil { panic(fmt.Sprintf("Can't start web handler:%s", err)) } }() - // TODO(bwplotka): Those tests create tons of new connection and memory that is never cleaned. - // Close and exhaust all response bodies. - // Give some time for the web goroutine to run since we need the server // to be up before starting tests. time.Sleep(5 * time.Second) resp, err := http.Get("http://localhost:9090/-/healthy") - testutil.Ok(t, err) testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) - resp, err = http.Get("http://localhost:9090/-/ready") + for _, u := range []string{ + "http://localhost:9090/-/ready", + "http://localhost:9090/version", + "http://localhost:9090/graph", + "http://localhost:9090/flags", + "http://localhost:9090/rules", + "http://localhost:9090/service-discovery", + "http://localhost:9090/targets", + "http://localhost:9090/status", + "http://localhost:9090/config", + } { + resp, err = http.Get(u) + testutil.Ok(t, err) + testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) + cleanupTestResponse(t, resp) + } + resp, err = http.Post("http://localhost:9090/api/v1/admin/tsdb/snapshot", "", strings.NewReader("")) testutil.Ok(t, err) testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) + cleanupTestResponse(t, resp) - resp, err = http.Get("http://localhost:9090/version") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/graph") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Post("http://localhost:9090/api/v2/admin/tsdb/snapshot", "", strings.NewReader("")) - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Post("http://localhost:9090/api/v2/admin/tsdb/delete_series", "", strings.NewReader("{}")) - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/graph") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/alerts") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/flags") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/rules") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/service-discovery") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/targets") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/config") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/status") - + resp, err = http.Post("http://localhost:9090/api/v1/admin/tsdb/delete_series", "", strings.NewReader("{}")) testutil.Ok(t, err) testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) + cleanupTestResponse(t, resp) // Set to ready. webHandler.Ready() - resp, err = http.Get("http://localhost:9090/-/healthy") + for _, u := range []string{ + "http://localhost:9090/-/healthy", + "http://localhost:9090/-/ready", + "http://localhost:9090/version", + "http://localhost:9090/graph", + "http://localhost:9090/flags", + "http://localhost:9090/rules", + "http://localhost:9090/service-discovery", + "http://localhost:9090/targets", + "http://localhost:9090/status", + "http://localhost:9090/config", + } { + resp, err = http.Get(u) + testutil.Ok(t, err) + testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) + } + resp, err = http.Post("http://localhost:9090/api/v1/admin/tsdb/snapshot", "", strings.NewReader("")) testutil.Ok(t, err) testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupSnapshot(t, dbDir, resp) + cleanupTestResponse(t, resp) - resp, err = http.Get("http://localhost:9090/-/ready") - + resp, err = http.Post("http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=up", "", nil) testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/version") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/graph") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Post("http://localhost:9090/api/v2/admin/tsdb/snapshot", "", strings.NewReader("")) - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Post("http://localhost:9090/api/v2/admin/tsdb/delete_series", "", strings.NewReader("{}")) - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/alerts") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/flags") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/rules") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/service-discovery") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/targets") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/config") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) - - resp, err = http.Get("http://localhost:9090/status") - - testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) + testutil.Equals(t, http.StatusNoContent, resp.StatusCode) + cleanupTestResponse(t, resp) } func TestRoutePrefix(t *testing.T) { t.Parallel() dbDir, err := ioutil.TempDir("", "tsdb-ready") testutil.Ok(t, err) - defer testutil.Ok(t, os.RemoveAll(dbDir)) + defer func() { testutil.Ok(t, os.RemoveAll(dbDir)) }() db, err := tsdb.Open(dbDir, nil, nil, nil) testutil.Ok(t, err) @@ -323,8 +252,10 @@ func TestRoutePrefix(t *testing.T) { opts.Flags = map[string]string{} webHandler := New(nil, opts) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() go func() { - err := webHandler.Run(context.Background()) + err := webHandler.Run(ctx) if err != nil { panic(fmt.Sprintf("Can't start web handler:%s", err)) } @@ -335,57 +266,58 @@ func TestRoutePrefix(t *testing.T) { time.Sleep(5 * time.Second) resp, err := http.Get("http://localhost:9091" + opts.RoutePrefix + "/-/healthy") - testutil.Ok(t, err) testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) resp, err = http.Get("http://localhost:9091" + opts.RoutePrefix + "/-/ready") - testutil.Ok(t, err) testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) + cleanupTestResponse(t, resp) resp, err = http.Get("http://localhost:9091" + opts.RoutePrefix + "/version") - testutil.Ok(t, err) testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) + cleanupTestResponse(t, resp) - resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v2/admin/tsdb/snapshot", "", strings.NewReader("")) - + resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v1/admin/tsdb/snapshot", "", strings.NewReader("")) testutil.Ok(t, err) testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) + cleanupTestResponse(t, resp) - resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v2/admin/tsdb/delete_series", "", strings.NewReader("{}")) - + resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v1/admin/tsdb/delete_series", "", strings.NewReader("{}")) testutil.Ok(t, err) testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode) + cleanupTestResponse(t, resp) // Set to ready. webHandler.Ready() resp, err = http.Get("http://localhost:9091" + opts.RoutePrefix + "/-/healthy") - testutil.Ok(t, err) testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) resp, err = http.Get("http://localhost:9091" + opts.RoutePrefix + "/-/ready") - testutil.Ok(t, err) testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) resp, err = http.Get("http://localhost:9091" + opts.RoutePrefix + "/version") - testutil.Ok(t, err) testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) - resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v2/admin/tsdb/snapshot", "", strings.NewReader("")) - + resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v1/admin/tsdb/snapshot", "", strings.NewReader("")) testutil.Ok(t, err) testutil.Equals(t, http.StatusOK, resp.StatusCode) + cleanupSnapshot(t, dbDir, resp) + cleanupTestResponse(t, resp) - resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v2/admin/tsdb/delete_series", "", strings.NewReader("{}")) - + resp, err = http.Post("http://localhost:9091"+opts.RoutePrefix+"/api/v1/admin/tsdb/delete_series?match[]=up", "", nil) testutil.Ok(t, err) - testutil.Equals(t, http.StatusOK, resp.StatusCode) + testutil.Equals(t, http.StatusNoContent, resp.StatusCode) + cleanupTestResponse(t, resp) } func TestDebugHandler(t *testing.T) { @@ -459,3 +391,93 @@ func TestHTTPMetrics(t *testing.T) { testutil.Equals(t, 2, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusOK))))) testutil.Equals(t, 1, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusServiceUnavailable))))) } + +func TestShutdownWithStaleConnection(t *testing.T) { + dbDir, err := ioutil.TempDir("", "tsdb-ready") + testutil.Ok(t, err) + defer func() { testutil.Ok(t, os.RemoveAll(dbDir)) }() + + db, err := tsdb.Open(dbDir, nil, nil, nil) + testutil.Ok(t, err) + + timeout := 10 * time.Second + + opts := &Options{ + ListenAddress: ":9090", + ReadTimeout: timeout, + MaxConnections: 512, + Context: nil, + Storage: nil, + LocalStorage: &dbAdapter{db}, + TSDBDir: dbDir, + QueryEngine: nil, + ScrapeManager: &scrape.Manager{}, + RuleManager: &rules.Manager{}, + Notifier: nil, + RoutePrefix: "/", + ExternalURL: &url.URL{ + Scheme: "http", + Host: "localhost:9090", + Path: "/", + }, + Version: &PrometheusVersion{}, + Gatherer: prometheus.DefaultGatherer, + } + + opts.Flags = map[string]string{} + + webHandler := New(nil, opts) + + webHandler.config = &config.Config{} + webHandler.notifier = ¬ifier.Manager{} + + closed := make(chan struct{}) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + err := webHandler.Run(ctx) + if err != nil { + panic(fmt.Sprintf("Can't start web handler:%s", err)) + } + close(closed) + }() + + // Give some time for the web goroutine to run since we need the server + // to be up before starting tests. + time.Sleep(5 * time.Second) + + // Open a socket, and don't use it. This connection should then be closed + // after the ReadTimeout. + c, err := net.Dial("tcp", "localhost:9090") + testutil.Ok(t, err) + t.Cleanup(func() { testutil.Ok(t, c.Close()) }) + + // Stop the web handler. + cancel() + + select { + case <-closed: + case <-time.After(timeout + 5*time.Second): + t.Fatalf("Server still running after read timeout.") + } +} + +func cleanupTestResponse(t *testing.T, resp *http.Response) { + _, err := io.Copy(ioutil.Discard, resp.Body) + testutil.Ok(t, err) + testutil.Ok(t, resp.Body.Close()) +} + +func cleanupSnapshot(t *testing.T, dbDir string, resp *http.Response) { + snapshot := &struct { + Data struct { + Name string `json:"name"` + } `json:"data"` + }{} + b, err := ioutil.ReadAll(resp.Body) + testutil.Ok(t, err) + testutil.Ok(t, json.Unmarshal(b, snapshot)) + testutil.Assert(t, snapshot.Data.Name != "", "snapshot directory not returned") + testutil.Ok(t, os.Remove(filepath.Join(dbDir, "snapshots", snapshot.Data.Name))) + testutil.Ok(t, os.Remove(filepath.Join(dbDir, "snapshots"))) +}