From c25f7c600b7bae4fee845e0574eb2b2eed5cf597 Mon Sep 17 00:00:00 2001
From: Brian Brazil <brian.brazil@robustperception.io>
Date: Mon, 16 Oct 2017 14:35:47 +0100
Subject: [PATCH 01/11] Apply external labels to remote read endpoint (#3263)

Fixes #3261
---
 web/api/v1/api.go      | 17 ++++++++
 web/api/v1/api_test.go | 88 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)

diff --git a/web/api/v1/api.go b/web/api/v1/api.go
index 131ed70ea4..b8de72ebc5 100644
--- a/web/api/v1/api.go
+++ b/web/api/v1/api.go
@@ -496,6 +496,23 @@ func (api *API) remoteRead(w http.ResponseWriter, r *http.Request) {
 			OldestInclusive: from,
 			NewestInclusive: through,
 		}))
+		externalLabels := api.config().GlobalConfig.ExternalLabels.Clone()
+		for _, ts := range resp.Results[i].Timeseries {
+			globalUsed := map[string]struct{}{}
+			for _, l := range ts.Labels {
+				if _, ok := externalLabels[model.LabelName(l.Name)]; ok {
+					globalUsed[l.Name] = struct{}{}
+				}
+			}
+			for ln, lv := range externalLabels {
+				if _, ok := globalUsed[string(ln)]; !ok {
+					ts.Labels = append(ts.Labels, &remote.LabelPair{
+						Name:  string(ln),
+						Value: string(lv),
+					})
+				}
+			}
+		}
 	}
 
 	if err := remote.EncodeReadResponse(&resp, w); err != nil {
diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go
index 463fb1d40d..6b5e9b033f 100644
--- a/web/api/v1/api_test.go
+++ b/web/api/v1/api_test.go
@@ -14,6 +14,7 @@
 package v1
 
 import (
+	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -25,6 +26,8 @@ import (
 	"testing"
 	"time"
 
+	"github.com/golang/protobuf/proto"
+	"github.com/golang/snappy"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/common/route"
 	"golang.org/x/net/context"
@@ -32,6 +35,8 @@ import (
 	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/prometheus/prometheus/retrieval"
+	"github.com/prometheus/prometheus/storage/metric"
+	"github.com/prometheus/prometheus/storage/remote"
 )
 
 type targetRetrieverFunc func() []*retrieval.Target
@@ -532,6 +537,89 @@ func TestEndpoints(t *testing.T) {
 	}
 }
 
+func TestReadEndpoint(t *testing.T) {
+	suite, err := promql.NewTest(t, `
+		load 1m
+			test_metric1{foo="bar",baz="qux"} 1
+	`)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer suite.Close()
+
+	if err := suite.Run(); err != nil {
+		t.Fatal(err)
+	}
+
+	api := &API{
+		Storage:     suite.Storage(),
+		QueryEngine: suite.QueryEngine(),
+		config: func() config.Config {
+			return config.Config{
+				GlobalConfig: config.GlobalConfig{
+					ExternalLabels: model.LabelSet{
+						"baz": "a",
+						"b":   "c",
+					},
+				},
+			}
+		},
+	}
+
+	// Encode the request.
+	matcher, err := metric.NewLabelMatcher(metric.Equal, "__name__", "test_metric1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	query, err := remote.ToQuery(0, 1, metric.LabelMatchers{matcher})
+	if err != nil {
+		t.Fatal(err)
+	}
+	req := &remote.ReadRequest{Queries: []*remote.Query{query}}
+	data, err := proto.Marshal(req)
+	if err != nil {
+		t.Fatal(err)
+	}
+	compressed := snappy.Encode(nil, data)
+	request, err := http.NewRequest("POST", "", bytes.NewBuffer(compressed))
+	if err != nil {
+		t.Fatal(err)
+	}
+	recorder := httptest.NewRecorder()
+	api.remoteRead(recorder, request)
+
+	// Decode the response.
+	compressed, err = ioutil.ReadAll(recorder.Result().Body)
+	if err != nil {
+		t.Fatal(err)
+	}
+	uncompressed, err := snappy.Decode(nil, compressed)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var resp remote.ReadResponse
+	err = proto.Unmarshal(uncompressed, &resp)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(resp.Results) != 1 {
+		t.Fatalf("Expected 1 result, got %d", len(resp.Results))
+	}
+
+	result := remote.FromQueryResult(resp.Results[0])
+	expected := &model.Matrix{
+		&model.SampleStream{
+			Metric: model.Metric{"__name__": "test_metric1", "b": "c", "baz": "qux", "foo": "bar"},
+			Values: []model.SamplePair{model.SamplePair{Value: 1, Timestamp: 0}},
+		},
+	}
+	if !reflect.DeepEqual(&result, expected) {
+		t.Fatalf("Expected response \n%v\n but got \n%v\n", result, expected)
+	}
+}
+
 func TestRespondSuccess(t *testing.T) {
 	s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		respond(w, "test")

From ced935e2d2bcb253f11e233862fde40c1b0f64a1 Mon Sep 17 00:00:00 2001
From: Brian Brazil <brian.brazil@robustperception.io>
Date: Thu, 19 Oct 2017 15:05:25 +0100
Subject: [PATCH 02/11] Release 1.8.1 (#3318)

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebd71eade9..483cf9792b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.8.1 / 2017-10-19
+
+* [BUGFIX] Apply external labels to remote read endpoint
+
 ## 1.8.0 / 2017-10-06
 
 * [CHANGE] Rule links link to the _Console_ tab rather than the _Graph_ tab to

From f6df3b7d5757fb358dd10194caefc0e2ecaa2c96 Mon Sep 17 00:00:00 2001
From: Brian Brazil <brian.brazil@robustperception.io>
Date: Thu, 19 Oct 2017 16:45:27 +0100
Subject: [PATCH 03/11] Bump version for 1.8.1

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 27f9cd322b..a8fdfda1c7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.8.0
+1.8.1

From 3a7c51ab70fc7615cd318204d3aa7c078b7c5b20 Mon Sep 17 00:00:00 2001
From: Brian Brazil <brian.brazil@robustperception.io>
Date: Mon, 23 Oct 2017 15:12:22 +0100
Subject: [PATCH 04/11] Remote read endpoint should handle matchers for
 external labels. (#3325)

If the other Prometheus has an external label that matches that of
the Prometheus being read from, then we need to remove that matcher
from the request as it's not actually stored in the database - it's
only added for alerts, federation and on the output of the remote read
endpoint.
Instead we check for that label being empty, in case there is a time
series with a different label value for that external label.
---
 CHANGELOG.md           |  2 +-
 web/api/v1/api.go      | 22 ++++++++++++++++++++--
 web/api/v1/api_test.go | 11 ++++++++---
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 483cf9792b..47fd50286c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 ## 1.8.1 / 2017-10-19
 
-* [BUGFIX] Apply external labels to remote read endpoint
+* [BUGFIX] Correctly handle external labels on remote read endpoint
 
 ## 1.8.0 / 2017-10-06
 
diff --git a/web/api/v1/api.go b/web/api/v1/api.go
index b8de72ebc5..649c08f338 100644
--- a/web/api/v1/api.go
+++ b/web/api/v1/api.go
@@ -486,7 +486,26 @@ func (api *API) remoteRead(w http.ResponseWriter, r *http.Request) {
 			http.Error(w, err.Error(), http.StatusBadRequest)
 			return
 		}
-		iters, err := querier.QueryRange(r.Context(), from, through, matchers...)
+		// Change equality matchers which match external labels
+		// to a matcher that looks for an empty label,
+		// as that label should not be present in the storage.
+		externalLabels := api.config().GlobalConfig.ExternalLabels.Clone()
+		filteredMatchers := make([]*metric.LabelMatcher, 0, len(matchers))
+		for _, m := range matchers {
+			value := externalLabels[m.Name]
+			if m.Type == metric.Equal && value == m.Value {
+				matcher, err := metric.NewLabelMatcher(metric.Equal, m.Name, "")
+				if err != nil {
+					http.Error(w, err.Error(), http.StatusInternalServerError)
+					return
+				}
+				filteredMatchers = append(filteredMatchers, matcher)
+			} else {
+				filteredMatchers = append(filteredMatchers, m)
+			}
+		}
+
+		iters, err := querier.QueryRange(r.Context(), from, through, filteredMatchers...)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
@@ -496,7 +515,6 @@ func (api *API) remoteRead(w http.ResponseWriter, r *http.Request) {
 			OldestInclusive: from,
 			NewestInclusive: through,
 		}))
-		externalLabels := api.config().GlobalConfig.ExternalLabels.Clone()
 		for _, ts := range resp.Results[i].Timeseries {
 			globalUsed := map[string]struct{}{}
 			for _, l := range ts.Labels {
diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go
index 6b5e9b033f..8be5e84960 100644
--- a/web/api/v1/api_test.go
+++ b/web/api/v1/api_test.go
@@ -560,6 +560,7 @@ func TestReadEndpoint(t *testing.T) {
 					ExternalLabels: model.LabelSet{
 						"baz": "a",
 						"b":   "c",
+						"d":   "e",
 					},
 				},
 			}
@@ -567,11 +568,15 @@ func TestReadEndpoint(t *testing.T) {
 	}
 
 	// Encode the request.
-	matcher, err := metric.NewLabelMatcher(metric.Equal, "__name__", "test_metric1")
+	matcher1, err := metric.NewLabelMatcher(metric.Equal, "__name__", "test_metric1")
 	if err != nil {
 		t.Fatal(err)
 	}
-	query, err := remote.ToQuery(0, 1, metric.LabelMatchers{matcher})
+	matcher2, err := metric.NewLabelMatcher(metric.Equal, "d", "e")
+	if err != nil {
+		t.Fatal(err)
+	}
+	query, err := remote.ToQuery(0, 1, metric.LabelMatchers{matcher1, matcher2})
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -611,7 +616,7 @@ func TestReadEndpoint(t *testing.T) {
 	result := remote.FromQueryResult(resp.Results[0])
 	expected := &model.Matrix{
 		&model.SampleStream{
-			Metric: model.Metric{"__name__": "test_metric1", "b": "c", "baz": "qux", "foo": "bar"},
+			Metric: model.Metric{"__name__": "test_metric1", "b": "c", "d": "e", "baz": "qux", "foo": "bar"},
 			Values: []model.SamplePair{model.SamplePair{Value: 1, Timestamp: 0}},
 		},
 	}

From 53a5f522244ead65eefc2abadd1f07555f281d1c Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <tobidt@gmail.com>
Date: Tue, 10 Oct 2017 14:58:52 +0200
Subject: [PATCH 05/11] Import first batch of Prometheus documentation

In order to provide documentation for each individual version, this
commit starts moving Prometheus server specific documentation into the
repository itself.
---
 docs/configuration.md   | 1141 +++++++++++++++++++++++++++++++++++++++
 docs/getting_started.md |  275 ++++++++++
 docs/index.md           |   16 +
 docs/installation.md    |   96 ++++
 4 files changed, 1528 insertions(+)
 create mode 100644 docs/configuration.md
 create mode 100644 docs/getting_started.md
 create mode 100644 docs/index.md
 create mode 100644 docs/installation.md

diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 0000000000..67f214a02c
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,1141 @@
+---
+title: Configuration
+sort_rank: 20
+---
+
+# Configuration
+
+Prometheus is configured via command-line flags and a configuration file. While
+the command-line flags configure immutable system parameters (such as storage
+locations, amount of data to keep on disk and in memory, etc.), the
+configuration file defines everything related to scraping [jobs and their
+instances](https://prometheus.io/docs/concepts/jobs_instances/), as well as
+which [rule files to load](querying/rules.md#configuring-rules).
+
+To view all available command-line flags, run `prometheus -h`.
+
+Prometheus can reload its configuration at runtime. If the new configuration
+is not well-formed, the changes will not be applied.
+A configuration reload is triggered by sending a `SIGHUP` to the Prometheus process or
+sending a HTTP POST request to the `/-/reload` endpoint.
+This will also reload any configured rule files.
+
+## Configuration file
+
+To specify which configuration file to load, use the `-config.file` flag.
+
+The file is written in [YAML format](http://en.wikipedia.org/wiki/YAML),
+defined by the scheme described below.
+Brackets indicate that a parameter is optional. For non-list parameters the
+value is set to the specified default.
+
+Generic placeholders are defined as follows:
+
+* `<boolean>`: a boolean that can take the values `true` or `false`
+* `<duration>`: a duration matching the regular expression `[0-9]+(ms|[smhdwy])`
+* `<labelname>`: a string matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*`
+* `<labelvalue>`: a string of unicode characters
+* `<filename>`: a valid path in the current working directory
+* `<host>`: a valid string consisting of a hostname or IP followed by an optional port number
+* `<path>`: a valid URL path
+* `<scheme>`: a string that can take the values `http` or `https`
+* `<string>`: a regular string
+* `<secret>`: a regular string that is a secret, such as a password
+
+The other placeholders are specified separately.
+
+A valid example file can be found [here](/config/testdata/conf.good.yml).
+
+The global configuration specifies parameters that are valid in all other configuration
+contexts. They also serve as defaults for other configuration sections.
+
+```yaml
+global:
+  # How frequently to scrape targets by default.
+  [ scrape_interval: <duration> | default = 1m ]
+
+  # How long until a scrape request times out.
+  [ scrape_timeout: <duration> | default = 10s ]
+
+  # How frequently to evaluate rules.
+  [ evaluation_interval: <duration> | default = 1m ]
+
+  # The labels to add to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+    [ <labelname>: <labelvalue> ... ]
+
+# Rule files specifies a list of globs. Rules and alerts are read from
+# all matching files.
+rule_files:
+  [ - <filepath_glob> ... ]
+
+# A list of scrape configurations.
+scrape_configs:
+  [ - <scrape_config> ... ]
+
+# Alerting specifies settings related to the Alertmanager.
+alerting:
+  alert_relabel_configs:
+    [ - <relabel_config> ... ]
+  alertmanagers:
+    [ - <alertmanager_config> ... ]
+
+# Settings related to the experimental remote write feature.
+remote_write:
+  [ - <remote_write> ... ]
+
+# Settings related to the experimental remote read feature.
+remote_read:
+  [ - <remote_read> ... ]
+```
+
+### `<scrape_config>`
+
+A `scrape_config` section specifies a set of targets and parameters describing how
+to scrape them. In the general case, one scrape configuration specifies a single
+job. In advanced configurations, this may change.
+
+Targets may be statically configured via the `static_configs` parameter or
+dynamically discovered using one of the supported service-discovery mechanisms.
+
+Additionally, `relabel_configs` allow advanced modifications to any
+target and its labels before scraping.
+
+```yaml
+# The job name assigned to scraped metrics by default.
+job_name: <job_name>
+
+# How frequently to scrape targets from this job.
+[ scrape_interval: <duration> | default = <global_config.scrape_interval> ]
+
+# Per-scrape timeout when scraping this job.
+[ scrape_timeout: <duration> | default = <global_config.scrape_timeout> ]
+
+# The HTTP resource path on which to fetch metrics from targets.
+[ metrics_path: <path> | default = /metrics ]
+
+# honor_labels controls how Prometheus handles conflicts between labels that are
+# already present in scraped data and labels that Prometheus would attach
+# server-side ("job" and "instance" labels, manually configured target
+# labels, and labels generated by service discovery implementations).
+#
+# If honor_labels is set to "true", label conflicts are resolved by keeping label
+# values from the scraped data and ignoring the conflicting server-side labels.
+#
+# If honor_labels is set to "false", label conflicts are resolved by renaming
+# conflicting labels in the scraped data to "exported_<original-label>" (for
+# example "exported_instance", "exported_job") and then attaching server-side
+# labels. This is useful for use cases such as federation, where all labels
+# specified in the target should be preserved.
+#
+# Note that any globally configured "external_labels" are unaffected by this
+# setting. In communication with external systems, they are always applied only
+# when a time series does not have a given label yet and are ignored otherwise.
+[ honor_labels: <boolean> | default = false ]
+
+# Configures the protocol scheme used for requests.
+[ scheme: <scheme> | default = http ]
+
+# Optional HTTP URL parameters.
+params:
+  [ <string>: [<string>, ...] ]
+
+# Sets the `Authorization` header on every scrape request with the
+# configured username and password.
+basic_auth:
+  [ username: <string> ]
+  [ password: <secret> ]
+
+# Sets the `Authorization` header on every scrape request with
+# the configured bearer token. It is mutually exclusive with `bearer_token_file`.
+[ bearer_token: <secret> ]
+
+# Sets the `Authorization` header on every scrape request with the bearer token
+# read from the configured file. It is mutually exclusive with `bearer_token`.
+[ bearer_token_file: /path/to/bearer/token/file ]
+
+# Configures the scrape request's TLS settings.
+tls_config:
+  [ <tls_config> ]
+
+# Optional proxy URL.
+[ proxy_url: <string> ]
+
+# List of Azure service discovery configurations.
+azure_sd_configs:
+  [ - <azure_sd_config> ... ]
+
+# List of Consul service discovery configurations.
+consul_sd_configs:
+  [ - <consul_sd_config> ... ]
+
+# List of DNS service discovery configurations.
+dns_sd_configs:
+  [ - <dns_sd_config> ... ]
+
+# List of EC2 service discovery configurations.
+ec2_sd_configs:
+  [ - <ec2_sd_config> ... ]
+
+# List of OpenStack service discovery configurations.
+openstack_sd_configs:
+  [ - <openstack_sd_config> ... ]
+
+# List of file service discovery configurations.
+file_sd_configs:
+  [ - <file_sd_config> ... ]
+
+# List of GCE service discovery configurations.
+gce_sd_configs:
+  [ - <gce_sd_config> ... ]
+
+# List of Kubernetes service discovery configurations.
+kubernetes_sd_configs:
+  [ - <kubernetes_sd_config> ... ]
+
+# List of Marathon service discovery configurations.
+marathon_sd_configs:
+  [ - <marathon_sd_config> ... ]
+
+# List of AirBnB's Nerve service discovery configurations.
+nerve_sd_configs:
+  [ - <nerve_sd_config> ... ]
+
+# List of Zookeeper Serverset service discovery configurations.
+serverset_sd_configs:
+  [ - <serverset_sd_config> ... ]
+
+# List of Triton service discovery configurations.
+triton_sd_configs:
+  [ - <triton_sd_config> ... ]
+
+# List of labeled statically configured targets for this job.
+static_configs:
+  [ - <static_config> ... ]
+
+# List of target relabel configurations.
+relabel_configs:
+  [ - <relabel_config> ... ]
+
+# List of metric relabel configurations.
+metric_relabel_configs:
+  [ - <relabel_config> ... ]
+
+# Per-scrape limit on number of scraped samples that will be accepted.
+# If more than this number of samples are present after metric relabelling
+# the entire scrape will be treated as failed. 0 means no limit.
+[ sample_limit: <int> | default = 0 ]
+```
+
+Where `<job_name>` must be unique across all scrape configurations.
+
+### `<tls_config>`
+
+A `tls_config` allows configuring TLS connections.
+
+```yaml
+# CA certificate to validate API server certificate with.
+[ ca_file: <filename> ]
+
+# Certificate and key files for client cert authentication to the server.
+[ cert_file: <filename> ]
+[ key_file: <filename> ]
+
+# ServerName extension to indicate the name of the server.
+# http://tools.ietf.org/html/rfc4366#section-3.1
+[ server_name: <string> ]
+
+# Disable validation of the server certificate.
+[ insecure_skip_verify: <boolean> ]
+```
+
+### `<azure_sd_config>`
+
+CAUTION: Azure SD is in beta: breaking changes to configuration are still
+likely in future releases.
+
+Azure SD configurations allow retrieving scrape targets from Azure VMs.
+
+The following meta labels are available on targets during relabeling:
+
+* `__meta_azure_machine_id`: the machine ID
+* `__meta_azure_machine_location`: the location the machine runs in
+* `__meta_azure_machine_name`: the machine name
+* `__meta_azure_machine_private_ip`: the machine's private IP
+* `__meta_azure_machine_resource_group`: the machine's resource group
+* `__meta_azure_machine_tag_<tagname>`: each tag value of the machine
+
+See below for the configuration options for Azure discovery:
+
+```yaml
+# The information to access the Azure API.
+# The subscription ID.
+subscription_id: <string>
+# The tenant ID.
+tenant_id: <string>
+# The client ID.
+client_id: <string>
+# The client secret.
+client_secret: <secret>
+
+# Refresh interval to re-read the instance list.
+[ refresh_interval: <duration> | default = 300s ]
+
+# The port to scrape metrics from. If using the public IP address, this must
+# instead be specified in the relabeling rule.
+[ port: <int> | default = 80 ]
+```
+
+### `<consul_sd_config>`
+
+Consul SD configurations allow retrieving scrape targets from [Consul's](https://www.consul.io)
+Catalog API.
+
+The following meta labels are available on targets during [relabeling](#relabel_config):
+
+* `__meta_consul_address`: the address of the target
+* `__meta_consul_dc`: the datacenter name for the target
+* `__meta_consul_node`: the node name defined for the target
+* `__meta_consul_service_address`: the service address of the target
+* `__meta_consul_service_id`: the service ID of the target
+* `__meta_consul_service_port`: the service port of the target
+* `__meta_consul_service`: the name of the service the target belongs to
+* `__meta_consul_tags`: the list of tags of the target joined by the tag separator
+
+```yaml
+# The information to access the Consul API. It is to be defined
+# as the Consul documentation requires.
+server: <host>
+[ token: <secret> ]
+[ datacenter: <string> ]
+[ scheme: <string> ]
+[ username: <string> ]
+[ password: <secret> ]
+
+# A list of services for which targets are retrieved. If omitted, all services
+# are scraped.
+services:
+  [ - <string> ]
+
+# The string by which Consul tags are joined into the tag label.
+[ tag_separator: <string> | default = , ]
+```
+
+Note that the IP number and port used to scrape the targets is assembled as
+`<__meta_consul_address>:<__meta_consul_service_port>`. However, in some
+Consul setups, the relevant address is in `__meta_consul_service_address`.
+In those cases, you can use the [relabel](#relabel_config)
+feature to replace the special `__address__` label.
+
+### `<dns_sd_config>`
+
+A DNS-based service discovery configuration allows specifying a set of DNS
+domain names which are periodically queried to discover a list of targets. The
+DNS servers to be contacted are read from `/etc/resolv.conf`.
+
+This service discovery method only supports basic DNS A, AAAA and SRV record
+queries, but not the advanced DNS-SD approach specified in
+[RFC6763](https://tools.ietf.org/html/rfc6763).
+
+During the [relabeling phase](#relabel_config), the meta label
+`__meta_dns_name` is available on each target and is set to the
+record name that produced the discovered target.
+
+```yaml
+# A list of DNS domain names to be queried.
+names:
+  [ - <domain_name> ]
+
+# The type of DNS query to perform.
+[ type: <query_type> | default = 'SRV' ]
+
+# The port number used if the query type is not SRV.
+[ port: <number>]
+
+# The time after which the provided names are refreshed.
+[ refresh_interval: <duration> | default = 30s ]
+```
+
+Where `<domain_name>` is a valid DNS domain name.
+Where `<query_type>` is `SRV`, `A`, or `AAAA`.
+
+### `<ec2_sd_config>`
+
+EC2 SD configurations allow retrieving scrape targets from AWS EC2
+instances. The private IP address is used by default, but may be changed to
+the public IP address with relabeling.
+
+The following meta labels are available on targets during [relabeling](#relabel_config):
+
+* `__meta_ec2_availability_zone`: the availability zone in which the instance is running
+* `__meta_ec2_instance_id`: the EC2 instance ID
+* `__meta_ec2_instance_state`: the state of the EC2 instance
+* `__meta_ec2_instance_type`: the type of the EC2 instance
+* `__meta_ec2_private_ip`: the private IP address of the instance, if present
+* `__meta_ec2_public_dns_name`: the public DNS name of the instance, if available
+* `__meta_ec2_public_ip`: the public IP address of the instance, if available
+* `__meta_ec2_subnet_id`: comma separated list of subnets IDs in which the instance is running, if available
+* `__meta_ec2_tag_<tagkey>`: each tag value of the instance
+* `__meta_ec2_vpc_id`: the ID of the VPC in which the instance is running, if available
+
+See below for the configuration options for EC2 discovery:
+
+```yaml
+# The information to access the EC2 API.
+
+# The AWS Region.
+region: <string>
+
+# The AWS API keys. If blank, the environment variables `AWS_ACCESS_KEY_ID`
+# and `AWS_SECRET_ACCESS_KEY` are used.
+[ access_key: <string> ]
+[ secret_key: <secret> ]
+# Named AWS profile used to connect to the API.
+[ profile: <string> ]
+
+# Refresh interval to re-read the instance list.
+[ refresh_interval: <duration> | default = 60s ]
+
+# The port to scrape metrics from. If using the public IP address, this must
+# instead be specified in the relabeling rule.
+[ port: <int> | default = 80 ]
+```
+
+### `<openstack_sd_config>`
+
+CAUTION: OpenStack SD is in beta: breaking changes to configuration are still
+likely in future releases.
+
+OpenStack SD configurations allow retrieving scrape targets from OpenStack Nova
+instances.
+
+The following meta labels are available on targets during [relabeling](#relabel_config):
+
+* `__meta_openstack_instance_id`: the OpenStack instance ID
+* `__meta_openstack_instance_name`: the OpenStack instance name
+* `__meta_openstack_instance_status`: the status of the OpenStack instance
+* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance
+* `__meta_openstack_public_ip`: the public IP of the OpenStack instance
+* `__meta_openstack_private_ip`: the private IP of the OpenStack instance
+* `__meta_openstack_tag_<tagkey>`: each tag value of the instance
+
+See below for the configuration options for OpenStack discovery:
+
+```yaml
+# The information to access the OpenStack API.
+
+# The OpenStack Region.
+region: <string>
+
+# identity_endpoint specifies the HTTP endpoint that is required to work with
+# the Identity API of the appropriate version. While it's ultimately needed by
+# all of the identity services, it will often be populated by a provider-level
+# function.
+[ identity_endpoint: <string> ]
+
+# username is required if using Identity V2 API. Consult with your provider's
+# control panel to discover your account's username. In Identity V3, either
+# userid or a combination of username and domain_id or domain_name are needed.
+[ username: <string> ]
+[ userid: <string> ]
+
+# password for the Identity V2 and V3 APIs. Consult with your provider's
+# control panel to discover your account's preferred method of authentication.
+[ password: <secret> ]
+
+# At most one of domain_id and domain_name must be provided if using username
+# with Identity V3. Otherwise, either are optional.
+[ domain_name: <string> ]
+[ domain_id: <string> ]
+
+# The project_id and project_name fields are optional for the Identity V2 API.
+# Some providers allow you to specify a project_name instead of the project_id.
+# Some require both. Your provider's authentication policies will determine
+# how these fields influence authentication.
+[ project_name: <string> ]
+[ project_id: <string> ]
+
+# Refresh interval to re-read the instance list.
+[ refresh_interval: <duration> | default = 60s ]
+
+# The port to scrape metrics from. If using the public IP address, this must
+# instead be specified in the relabeling rule.
+[ port: <int> | default = 80 ]
+```
+
+### `<file_sd_config>`
+
+File-based service discovery provides a more generic way to configure static targets
+and serves as an interface to plug in custom service discovery mechanisms.
+
+It reads a set of files containing a list of zero or more
+`<static_config>`s. Changes to all defined files are detected via disk watches
+and applied immediately. Files may be provided in YAML or JSON format. Only
+changes resulting in well-formed target groups are applied.
+
+The JSON file must contain a list of static configs, using this format:
+
+```yaml
+[
+  {
+    "targets": [ "<host>", ... ],
+    "labels": {
+      "<labelname>": "<labelvalue>", ...
+    }
+  },
+  ...
+]
+```
+
+As a fallback, the file contents are also re-read periodically at the specified
+refresh interval.
+
+Each target has a meta label `__meta_filepath` during the
+[relabeling phase](#relabel_config). Its value is set to the
+filepath from which the target was extracted.
+
+There is a list of
+[integrations](/docs/operating/configuration/#<file_sd_config>) with this
+discovery mechanism.
+
+```yaml
+# Patterns for files from which target groups are extracted.
+files:
+  [ - <filename_pattern> ... ]
+
+# Refresh interval to re-read the files.
+[ refresh_interval: <duration> | default = 5m ]
+```
+
+Where `<filename_pattern>` may be a path ending in `.json`, `.yml` or `.yaml`. The last path segment
+may contain a single `*` that matches any character sequence, e.g. `my/path/tg_*.json`.
+
+### `<gce_sd_config>`
+
+CAUTION: GCE SD is in beta: breaking changes to configuration are still
+likely in future releases.
+
+[GCE](https://cloud.google.com/compute/) SD configurations allow retrieving scrape targets from GCP GCE instances.
+The private IP address is used by default, but may be changed to the public IP
+address with relabeling.
+
+The following meta labels are available on targets during [relabeling](#relabel_config):
+
+* `__meta_gce_instance_name`: the name of the instance
+* `__meta_gce_metadata_<name>`: each metadata item of the instance
+* `__meta_gce_network`: the network URL of the instance
+* `__meta_gce_private_ip`: the private IP address of the instance
+* `__meta_gce_project`: the GCP project in which the instance is running
+* `__meta_gce_public_ip`: the public IP address of the instance, if present
+* `__meta_gce_subnetwork`: the subnetwork URL of the instance
+* `__meta_gce_tags`: comma separated list of instance tags
+* `__meta_gce_zone`: the GCE zone URL in which the instance is running
+
+See below for the configuration options for GCE discovery:
+
+```yaml
+# The information to access the GCE API.
+
+# The GCP Project
+project: <string>
+
+# The zone of the scrape targets. If you need multiple zones use multiple
+# gce_sd_configs.
+zone: <string>
+
+# Filter can be used optionally to filter the instance list by other criteria
+[ filter: <string> ]
+
+# Refresh interval to re-read the instance list
+[ refresh_interval: <duration> | default = 60s ]
+
+# The port to scrape metrics from. If using the public IP address, this must
+# instead be specified in the relabeling rule.
+[ port: <int> | default = 80 ]
+
+# The tag separator is used to separate the tags on concatenation
+[ tag_separator: <string> | default = , ]
+```
+
+Credentials are discovered by the Google Cloud SDK default client by looking
+in the following places, preferring the first location found:
+
+1. a JSON file specified by the `GOOGLE_APPLICATION_CREDENTIALS` environment variable
+2. a JSON file in the well-known path `$HOME/.config/gcloud/application_default_credentials.json`
+3. fetched from the GCE metadata server
+
+If Prometheus is running within GCE, the service account associated with the
+instance it is running on should have at least read-only permissions to the
+compute resources. If running outside of GCE make sure to create an appropriate
+service account and place the credential file in one of the expected locations.
+
+### `<kubernetes_sd_config>`
+
+CAUTION: Kubernetes SD is in beta: breaking changes to configuration are still
+likely in future releases.
+
+Kubernetes SD configurations allow retrieving scrape targets from
+[Kubernetes'](http://kubernetes.io/) REST API and always staying synchronized with
+the cluster state.
+
+One of the following `role` types can be configured to discover targets:
+
+#### `node`
+
+The `node` role discovers one target per cluster node with the address defaulting
+to the Kubelet's HTTP port.
+The target address defaults to the first existing address of the Kubernetes
+node object in the address type order of `NodeInternalIP`, `NodeExternalIP`,
+`NodeLegacyHostIP`, and `NodeHostName`.
+
+Available meta labels:
+
+* `__meta_kubernetes_node_name`: The name of the node object.
+* `__meta_kubernetes_node_label_<labelname>`: Each label from the node object.
+* `__meta_kubernetes_node_annotation_<annotationname>`: Each annotation from the node object.
+* `__meta_kubernetes_node_address_<address_type>`: The first address for each node address type, if it exists.
+
+In addition, the `instance` label for the node will be set to the node name
+as retrieved from the API server.
+
+#### `service`
+
+The `service` role discovers a target for each service port for each service.
+This is generally useful for blackbox monitoring of a service.
+The address will be set to the Kubernetes DNS name of the service and respective
+service port.
+
+Available meta labels:
+
+* `__meta_kubernetes_namespace`: The namespace of the service object.
+* `__meta_kubernetes_service_name`: The name of the service object.
+* `__meta_kubernetes_service_label_<labelname>`: The label of the service object.
+* `__meta_kubernetes_service_annotation_<annotationname>`: The annotation of the service object.
+* `__meta_kubernetes_service_port_name`: Name of the service port for the target.
+* `__meta_kubernetes_service_port_number`: Number of the service port for the target.
+* `__meta_kubernetes_service_port_protocol`: Protocol of the service port for the target.
+
+#### `pod`
+
+The `pod` role discovers all pods and exposes their containers as targets. For each declared
+port of a container, a single target is generated. If a container has no specified ports,
+a port-free target per container is created for manually adding a port via relabeling.
+
+Available meta labels:
+
+* `__meta_kubernetes_namespace`: The namespace of the pod object.
+* `__meta_kubernetes_pod_name`: The name of the pod object.
+* `__meta_kubernetes_pod_ip`: The pod IP of the pod object.
+* `__meta_kubernetes_pod_label_<labelname>`: The label of the pod object.
+* `__meta_kubernetes_pod_annotation_<annotationname>`: The annotation of the pod object.
+* `__meta_kubernetes_pod_container_name`: Name of the container the target address points to.
+* `__meta_kubernetes_pod_container_port_name`: Name of the container port.
+* `__meta_kubernetes_pod_container_port_number`: Number of the container port.
+* `__meta_kubernetes_pod_container_port_protocol`: Protocol of the container port.
+* `__meta_kubernetes_pod_ready`: Set to `true` or `false` for the pod's ready state.
+* `__meta_kubernetes_pod_node_name`: The name of the node the pod is scheduled onto.
+* `__meta_kubernetes_pod_host_ip`: The current host IP of the pod object.
+
+#### `endpoints`
+
+The `endpoints` role discovers targets from listed endpoints of a service. For each endpoint
+address one target is discovered per port. If the endpoint is backed by a pod, all
+additional container ports of the pod, not bound to an endpoint port, are discovered as targets as well.
+
+Available meta labels:
+
+* `__meta_kubernetes_namespace`: The namespace of the endpoints object.
+* `__meta_kubernetes_endpoints_name`: The names of the endpoints object.
+* For all targets discovered directly from the endpoints list (those not additionally inferred
+  from underlying pods), the following labels are attached:
+  * `__meta_kubernetes_endpoint_ready`: Set to `true` or `false` for the endpoint's ready state.
+  * `__meta_kubernetes_endpoint_port_name`: Name of the endpoint port.
+  * `__meta_kubernetes_endpoint_port_protocol`: Protocol of the endpoint port.
+* If the endpoints belong to a service, all labels of the `role: service` discovery are attached.
+* For all targets backed by a pod, all labels of the `role: pod` discovery are attached.
+
+See below for the configuration options for Kubernetes discovery:
+
+```yaml
+# The information to access the Kubernetes API.
+
+# The API server addresses. If left empty, Prometheus is assumed to run inside
+# of the cluster and will discover API servers automatically and use the pod's
+# CA certificate and bearer token file at /var/run/secrets/kubernetes.io/serviceaccount/.
+[ api_server: <host> ]
+
+# The Kubernetes role of entities that should be discovered.
+role: <role>
+
+# Optional authentication information used to authenticate to the API server.
+# Note that `basic_auth`, `bearer_token` and `bearer_token_file` options are
+# mutually exclusive.
+
+# Optional HTTP basic authentication information.
+basic_auth:
+  [ username: <string> ]
+  [ password: <secret> ]
+
+# Optional bearer token authentication information.
+[ bearer_token: <secret> ]
+
+# Optional bearer token file authentication information.
+[ bearer_token_file: <filename> ]
+
+# TLS configuration.
+tls_config:
+  [ <tls_config> ]
+
+# Optional namespace discovery. If omitted, all namespaces are used.
+namespaces:
+  names:
+    [ - <string> ]
+```
+
+Where `<role>` must be `endpoints`, `service`, `pod`, or `node`.
+
+See [this example Prometheus configuration file](/documentation/examples/prometheus-kubernetes.yml)
+for a detailed example of configuring Prometheus for Kubernetes.
+
+You may wish to check out the 3rd party [Prometheus Operator](https://github.com/coreos/prometheus-operator),
+which automates the Prometheus setup on top of Kubernetes.
+
+### `<marathon_sd_config>`
+
+CAUTION: Marathon SD is in beta: breaking changes to configuration are still
+likely in future releases.
+
+Marathon SD configurations allow retrieving scrape targets using the
+[Marathon](https://mesosphere.github.io/marathon/) REST API. Prometheus
+will periodically check the REST endpoint for currently running tasks and
+create a target group for every app that has at least one healthy task.
+
+The following meta labels are available on targets during [relabeling](#relabel_config):
+
+* `__meta_marathon_app`: the name of the app (with slashes replaced by dashes)
+* `__meta_marathon_image`: the name of the Docker image used (if available)
+* `__meta_marathon_task`: the ID of the Mesos task
+* `__meta_marathon_app_label_<labelname>`: any Marathon labels attached to the app
+* `__meta_marathon_port_definition_label_<labelname>`: the port definition labels
+* `__meta_marathon_port_mapping_label_<labelname>`: the port mapping labels
+
+See below for the configuration options for Marathon discovery:
+
+```yaml
+# List of URLs to be used to contact Marathon servers.
+# You need to provide at least one server URL, but should provide URLs for
+# all masters you have running.
+servers:
+  - <string>
+
+# Optional bearer token authentication information.
+# It is mutually exclusive with `bearer_token_file`.
+[ bearer_token: <secret> ]
+
+# Optional bearer token file authentication information.
+# It is mutually exclusive with `bearer_token`.
+[ bearer_token_file: <filename> ]
+
+# Polling interval
+[ refresh_interval: <duration> | default = 30s ]
+```
+
+By default every app listed in Marathon will be scraped by Prometheus. If not all
+of your services provide Prometheus metrics, you can use a Marathon label and
+Prometheus relabeling to control which instances will actually be scraped. Also
+by default all apps will show up as a single job in Prometheus (the one specified
+in the configuration file), which can also be changed using relabeling.
+
+### `<nerve_sd_config>`
+
+Nerve SD configurations allow retrieving scrape targets from [AirBnB's
+Nerve](https://github.com/airbnb/nerve) which are stored in
+[Zookeeper](https://zookeeper.apache.org/).
+
+The following meta labels are available on targets during [relabeling](#relabel_config):
+
+* `__meta_nerve_path`: the full path to the endpoint node in Zookeeper
+* `__meta_nerve_endpoint_host`: the host of the endpoint
+* `__meta_nerve_endpoint_port`: the port of the endpoint
+* `__meta_nerve_endpoint_name`: the name of the endpoint
+
+```yaml
+# The Zookeeper servers.
+servers:
+  - <host>
+# Paths can point to a single service, or the root of a tree of services.
+paths:
+  - <string>
+[ timeout: <duration> | default = 10s ]
+```
+
+### `<serverset_sd_config>`
+
+Serverset SD configurations allow retrieving scrape targets from
+[Serversets](https://github.com/twitter/finagle/tree/master/finagle-serversets)
+which are stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are
+commonly used by [Finagle](https://twitter.github.io/finagle/) and
+[Aurora](http://aurora.apache.org/).
+
+The following meta labels are available on targets during relabeling:
+
+* `__meta_serverset_path`: the full path to the serverset member node in Zookeeper
+* `__meta_serverset_endpoint_host`: the host of the default endpoint
+* `__meta_serverset_endpoint_port`: the port of the default endpoint
+* `__meta_serverset_endpoint_host_<endpoint>`: the host of the given endpoint
+* `__meta_serverset_endpoint_port_<endpoint>`: the port of the given endpoint
+* `__meta_serverset_shard`: the shard number of the member
+* `__meta_serverset_status`: the status of the member
+
+```yaml
+# The Zookeeper servers.
+servers:
+  - <host>
+# Paths can point to a single serverset, or the root of a tree of serversets.
+paths:
+  - <string>
+[ timeout: <duration> | default = 10s ]
+```
+
+Serverset data must be in the JSON format, the Thrift format is not currently supported.
+
+### `<triton_sd_config>`
+
+CAUTION: Triton SD is in beta: breaking changes to configuration are still
+likely in future releases.
+
+[Triton](https://github.com/joyent/triton) SD configurations allow retrieving
+scrape targets from [Container Monitor](https://github.com/joyent/rfd/blob/master/rfd/0027/README.md)
+discovery endpoints.
+
+The following meta labels are available on targets during relabeling:
+
+* `__meta_triton_machine_id`: the UUID of the target container
+* `__meta_triton_machine_alias`: the alias of the target container
+* `__meta_triton_machine_image`: the target containers image type
+* `__meta_triton_machine_server_id`: the server UUID for the target container
+
+```yaml
+# The information to access the Triton discovery API.
+
+# The account to use for discovering new target containers.
+account: <string>
+
+# The DNS suffix which should be applied to target containers.
+dns_suffix: <string>
+
+# The Triton discovery endpoint (e.g. 'cmon.us-east-3b.triton.zone'). This is
+# often the same value as dns_suffix.
+endpoint: <string>
+
+# The port to use for discovery and metric scraping.
+[ port: <int> | default = 9163 ]
+
+# The interval which should should be used for refreshing target containers.
+[ refresh_interval: <duration> | default = 60s ]
+
+# The Triton discovery API version.
+[ version: <int> | default = 1 ]
+
+# TLS configuration.
+tls_config:
+  [ <tls_config> ]
+```
+
+### `<static_config>`
+
+A `static_config` allows specifying a list of targets and a common label set
+for them.  It is the canonical way to specify static targets in a scrape
+configuration.
+
+```yaml
+# The targets specified by the static config.
+targets:
+  [ - '<host>' ]
+
+# Labels assigned to all metrics scraped from the targets.
+labels:
+  [ <labelname>: <labelvalue> ... ]
+```
+
+### `<relabel_config>`
+
+Relabeling is a powerful tool to dynamically rewrite the label set of a target before
+it gets scraped. Multiple relabeling steps can be configured per scrape configuration.
+They are applied to the label set of each target in order of their appearance
+in the configuration file.
+
+Initially, aside from the configured per-target labels, a target's `job`
+label is set to the `job_name` value of the respective scrape configuration.
+The `__address__` label is set to the `<host>:<port>` address of the target.
+After relabeling, the `instance` label is set to the value of `__address__` by default if
+it was not set during relabeling. The `__scheme__` and `__metrics_path__` labels
+are set to the scheme and metrics path of the target respectively. The `__param_<name>`
+label is set to the value of the first passed URL parameter called `<name>`.
+
+Additional labels prefixed with `__meta_` may be available during the
+relabeling phase. They are set by the service discovery mechanism that provided
+the target and vary between mechanisms.
+
+Labels starting with `__` will be removed from the label set after relabeling is completed.
+
+If a relabeling step needs to store a label value only temporarily (as the
+input to a subsequent relabeling step), use the `__tmp` label name prefix. This
+prefix is guaranteed to never be used by Prometheus itself.
+
+```yaml
+# The source labels select values from existing labels. Their content is concatenated
+# using the configured separator and matched against the configured regular expression
+# for the replace, keep, and drop actions.
+[ source_labels: '[' <labelname> [, ...] ']' ]
+
+# Separator placed between concatenated source label values.
+[ separator: <string> | default = ; ]
+
+# Label to which the resulting value is written in a replace action.
+# It is mandatory for replace actions. Regex capture groups are available.
+[ target_label: <labelname> ]
+
+# Regular expression against which the extracted value is matched.
+[ regex: <regex> | default = (.*) ]
+
+# Modulus to take of the hash of the source label values.
+[ modulus: <uint64> ]
+
+# Replacement value against which a regex replace is performed if the
+# regular expression matches. Regex capture groups are available.
+[ replacement: <string> | default = $1 ]
+
+# Action to perform based on regex matching.
+[ action: <relabel_action> | default = replace ]
+```
+
+`<regex>` is any valid
+[RE2 regular expression](https://github.com/google/re2/wiki/Syntax). It is
+required for the `replace`, `keep`, `drop`, `labelmap`,`labeldrop` and `labelkeep` actions. The regex is
+anchored on both ends. To un-anchor the regex, use `.*<regex>.*`.
+
+`<relabel_action>` determines the relabeling action to take:
+
+* `replace`: Match `regex` against the concatenated `source_labels`. Then, set
+  `target_label` to `replacement`, with match group references
+  (`${1}`, `${2}`, ...) in `replacement` substituted by their value. If `regex`
+  does not match, no replacement takes place.
+* `keep`: Drop targets for which `regex` does not match the concatenated `source_labels`.
+* `drop`: Drop targets for which `regex` matches the concatenated `source_labels`.
+* `hashmod`: Set `target_label` to the `modulus` of a hash of the concatenated `source_labels`.
+* `labelmap`: Match `regex` against all label names. Then copy the values of the matching labels
+   to label names given by `replacement` with match group references
+  (`${1}`, `${2}`, ...) in `replacement` substituted by their value.
+* `labeldrop`: Match `regex` against all label names. Any label that matches will be
+  removed from the set of labels.
+* `labelkeep`: Match `regex` against all label names. Any label that does not match will be
+  removed from the set of labels.
+
+Care must be taken with `labeldrop` and `labelkeep` to ensure that metrics are still uniquely labeled
+once the labels are removed.
+
+### `<metric_relabel_configs>`
+
+Metric relabeling is applied to samples as the last step before ingestion. It
+has the same configuration format and actions as target relabeling. Metric
+relabeling does not apply to automatically generated timeseries such as `up`.
+
+One use for this is to blacklist time series that are too expensive to ingest.
+
+### `<alert_relabel_configs>`
+
+Alert relabeling is applied to alerts before they are sent to the Alertmanager.
+It has the same configuration format and actions as target relabeling. Alert
+relabeling is applied after external labels.
+
+One use for this is ensuring a HA pair of Prometheus servers with different
+external labels send identical alerts.
+
+### `<alertmanager_config>`
+
+CAUTION: Dynamic discovery of Alertmanager instances is in alpha state. Breaking configuration
+changes may happen in future releases. Use static configuration via the `-alertmanager.url` flag
+as a stable alternative.
+
+An `alertmanager_config` section specifies Alertmanager instances the Prometheus server sends
+alerts to. It also provides parameters to configure how to communicate with these Alertmanagers.
+
+Alertmanagers may be statically configured via the `static_configs` parameter or
+dynamically discovered using one of the supported service-discovery mechanisms.
+
+Additionally, `relabel_configs` allow selecting Alertmanagers from discovered
+entities and provide advanced modifications to the used API path, which is exposed
+through the `__alerts_path__` label.
+
+```yaml
+# Per-target Alertmanager timeout when pushing alerts.
+[ timeout: <duration> | default = 10s ]
+
+# Prefix for the HTTP path alerts are pushed to.
+[ path_prefix: <path> | default = / ]
+
+# Configures the protocol scheme used for requests.
+[ scheme: <scheme> | default = http ]
+
+# Sets the `Authorization` header on every request with the
+# configured username and password.
+basic_auth:
+  [ username: <string> ]
+  [ password: <string> ]
+
+# Sets the `Authorization` header on every request with
+# the configured bearer token. It is mutually exclusive with `bearer_token_file`.
+[ bearer_token: <string> ]
+
+# Sets the `Authorization` header on every request with the bearer token
+# read from the configured file. It is mutually exclusive with `bearer_token`.
+[ bearer_token_file: /path/to/bearer/token/file ]
+
+# Configures the scrape request's TLS settings.
+tls_config:
+  [ <tls_config> ]
+
+# Optional proxy URL.
+[ proxy_url: <string> ]
+
+# List of Azure service discovery configurations.
+azure_sd_configs:
+  [ - <azure_sd_config> ... ]
+
+# List of Consul service discovery configurations.
+consul_sd_configs:
+  [ - <consul_sd_config> ... ]
+
+# List of DNS service discovery configurations.
+dns_sd_configs:
+  [ - <dns_sd_config> ... ]
+
+# List of EC2 service discovery configurations.
+ec2_sd_configs:
+  [ - <ec2_sd_config> ... ]
+
+# List of file service discovery configurations.
+file_sd_configs:
+  [ - <file_sd_config> ... ]
+
+# List of GCE service discovery configurations.
+gce_sd_configs:
+  [ - <gce_sd_config> ... ]
+
+# List of Kubernetes service discovery configurations.
+kubernetes_sd_configs:
+  [ - <kubernetes_sd_config> ... ]
+
+# List of Marathon service discovery configurations.
+marathon_sd_configs:
+  [ - <marathon_sd_config> ... ]
+
+# List of AirBnB's Nerve service discovery configurations.
+nerve_sd_configs:
+  [ - <nerve_sd_config> ... ]
+
+# List of Zookeeper Serverset service discovery configurations.
+serverset_sd_configs:
+  [ - <serverset_sd_config> ... ]
+
+# List of Triton service discovery configurations.
+triton_sd_configs:
+  [ - <triton_sd_config> ... ]
+
+# List of labeled statically configured Alertmanagers.
+static_configs:
+  [ - <static_config> ... ]
+
+# List of Alertmanager relabel configurations.
+relabel_configs:
+  [ - <relabel_config> ... ]
+```
+
+### `<remote_write>`
+
+CAUTION: Remote write is experimental: breaking changes to configuration are
+likely in future releases.
+
+`write_relabel_configs` is relabeling applied to samples before sending them
+to the remote endpoint. Write relabeling is applied after external labels. This
+could be used to limit which samples are sent.
+
+There is a [small demo](/documentation/examples/remote_storage) of how to use
+this functionality.
+
+```yaml
+# The URL of the endpoint to send samples to.
+url: <string>
+
+# Timeout for requests to the remote write endpoint.
+[ remote_timeout: <duration> | default = 30s ]
+
+# List of remote write relabel configurations.
+write_relabel_configs:
+  [ - <relabel_config> ... ]
+
+# Sets the `Authorization` header on every remote write request with the
+# configured username and password.
+basic_auth:
+  [ username: <string> ]
+  [ password: <string> ]
+
+# Sets the `Authorization` header on every remote write request with
+# the configured bearer token. It is mutually exclusive with `bearer_token_file`.
+[ bearer_token: <string> ]
+
+# Sets the `Authorization` header on every remote write request with the bearer token
+# read from the configured file. It is mutually exclusive with `bearer_token`.
+[ bearer_token_file: /path/to/bearer/token/file ]
+
+# Configures the remote write request's TLS settings.
+tls_config:
+  [ <tls_config> ]
+
+# Optional proxy URL.
+[ proxy_url: <string> ]
+```
+
+There is a list of
+[integrations](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage)
+with this feature.
+
+### `<remote_read>`
+
+CAUTION: Remote read is experimental: breaking changes to configuration are
+likely in future releases.
+
+```yaml
+# The URL of the endpoint to query from.
+url: <string>
+
+# Timeout for requests to the remote read endpoint.
+[ remote_timeout: <duration> | default = 30s ]
+
+# Sets the `Authorization` header on every remote read request with the
+# configured username and password.
+basic_auth:
+  [ username: <string> ]
+  [ password: <string> ]
+
+# Sets the `Authorization` header on every remote read request with
+# the configured bearer token. It is mutually exclusive with `bearer_token_file`.
+[ bearer_token: <string> ]
+
+# Sets the `Authorization` header on every remote read request with the bearer token
+# read from the configured file. It is mutually exclusive with `bearer_token`.
+[ bearer_token_file: /path/to/bearer/token/file ]
+
+# Configures the remote read request's TLS settings.
+tls_config:
+  [ <tls_config> ]
+
+# Optional proxy URL.
+[ proxy_url: <string> ]
+```
+
+There is a list of
+[integrations](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage)
+with this feature.
diff --git a/docs/getting_started.md b/docs/getting_started.md
new file mode 100644
index 0000000000..8585e25664
--- /dev/null
+++ b/docs/getting_started.md
@@ -0,0 +1,275 @@
+---
+title: Getting started
+sort_rank: 10
+---
+
+# Getting started
+
+This guide is a "Hello World"-style tutorial which shows how to install,
+configure, and use Prometheus in a simple example setup. You will download and run
+Prometheus locally, configure it to scrape itself and an example application,
+and then work with queries, rules, and graphs to make use of the collected time
+series data.
+
+## Downloading and running Prometheus
+
+[Download the latest release](https://prometheus.io/download) of Prometheus for
+your platform, then extract and run it:
+
+```bash
+tar xvfz prometheus-*.tar.gz
+cd prometheus-*
+```
+
+Before starting Prometheus, let's configure it.
+
+## Configuring Prometheus to monitor itself
+
+Prometheus collects metrics from monitored targets by scraping metrics HTTP
+endpoints on these targets. Since Prometheus also exposes data in the same
+manner about itself, it can also scrape and monitor its own health.
+
+While a Prometheus server that collects only data about itself is not very
+useful in practice, it is a good starting example. Save the following basic
+Prometheus configuration as a file named `prometheus.yml`:
+
+```yaml
+global:
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+    monitor: 'codelab-monitor'
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'prometheus'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    static_configs:
+      - targets: ['localhost:9090']
+```
+
+For a complete specification of configuration options, see the
+[configuration documentation](configuration.md).
+
+## Starting Prometheus
+
+To start Prometheus with your newly created configuration file, change to your
+Prometheus build directory and run:
+
+```bash
+# Start Prometheus.
+# By default, Prometheus stores its database in ./data (flag -storage.local.path).
+./prometheus -config.file=prometheus.yml
+```
+
+Prometheus should start up and it should show a status page about itself at
+[localhost:9090](http://localhost:9090). Give it a couple of seconds to collect
+data about itself from its own HTTP metrics endpoint.
+
+You can also verify that Prometheus is serving metrics about itself by
+navigating to its metrics endpoint:
+[localhost:9090/metrics](http://localhost:9090/metrics)
+
+The number of OS threads executed by Prometheus is controlled by the
+`GOMAXPROCS` environment variable. As of Go 1.5 the default value is
+the number of cores available.
+
+Blindly setting `GOMAXPROCS` to a high value can be
+counterproductive. See the relevant [Go
+FAQs](http://golang.org/doc/faq#Why_no_multi_CPU).
+
+Note that Prometheus by default uses around 3GB in memory. If you have a
+smaller machine, you can tune Prometheus to use less memory.  For details,
+see the [memory usage documentation](storage.md#memory-usage).
+
+## Using the expression browser
+
+Let us try looking at some data that Prometheus has collected about itself. To
+use Prometheus's built-in expression browser, navigate to
+http://localhost:9090/graph and choose the "Console" view within the "Graph"
+tab.
+
+As you can gather from http://localhost:9090/metrics, one metric that
+Prometheus exports about itself is called
+`prometheus_target_interval_length_seconds` (the actual amount of time between
+target scrapes). Go ahead and enter this into the expression console:
+
+```
+prometheus_target_interval_length_seconds
+```
+
+This should return a lot of different time series (along with the latest value
+recorded for each), all with the metric name
+`prometheus_target_interval_length_seconds`, but with different labels. These
+labels designate different latency percentiles and target group intervals.
+
+If we were only interested in the 99th percentile latencies, we could use this
+query to retrieve that information:
+
+```
+prometheus_target_interval_length_seconds{quantile="0.99"}
+```
+
+To count the number of returned time series, you could write:
+
+```
+count(prometheus_target_interval_length_seconds)
+```
+
+For more about the expression language, see the
+[expression language documentation](querying/basics.md).
+
+## Using the graphing interface
+
+To graph expressions, navigate to http://localhost:9090/graph and use the "Graph"
+tab.
+
+For example, enter the following expression to graph the per-second rate of all
+storage chunk operations happening in the self-scraped Prometheus:
+
+```
+rate(prometheus_local_storage_chunk_ops_total[1m])
+```
+
+Experiment with the graph range parameters and other settings.
+
+## Starting up some sample targets
+
+Let us make this more interesting and start some example targets for Prometheus
+to scrape.
+
+The Go client library includes an example which exports fictional RPC latencies
+for three services with different latency distributions.
+
+Ensure you have the [Go compiler installed](https://golang.org/doc/install) and
+have a [working Go build environment](https://golang.org/doc/code.html) (with
+correct `GOPATH`) set up.
+
+Download the Go client library for Prometheus and run three of these example
+processes:
+
+```bash
+# Fetch the client library code and compile example.
+git clone https://github.com/prometheus/client_golang.git
+cd client_golang/examples/random
+go get -d
+go build
+
+# Start 3 example targets in separate terminals:
+./random -listen-address=:8080
+./random -listen-address=:8081
+./random -listen-address=:8082
+```
+
+You should now have example targets listening on http://localhost:8080/metrics,
+http://localhost:8081/metrics, and http://localhost:8082/metrics.
+
+## Configuring Prometheus to monitor the sample targets
+
+Now we will configure Prometheus to scrape these new targets. Let's group all
+three endpoints into one job called `example-random`. However, imagine that the
+first two endpoints are production targets, while the third one represents a
+canary instance. To model this in Prometheus, we can add several groups of
+endpoints to a single job, adding extra labels to each group of targets. In
+this example, we will add the `group="production"` label to the first group of
+targets, while adding `group="canary"` to the second.
+
+To achieve this, add the following job definition to the `scrape_configs`
+section in your `prometheus.yml` and restart your Prometheus instance:
+
+```yaml
+scrape_configs:
+  - job_name: 'example-random'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    static_configs:
+      - targets: ['localhost:8080', 'localhost:8081']
+        labels:
+          group: 'production'
+
+      - targets: ['localhost:8082']
+        labels:
+          group: 'canary'
+```
+
+Go to the expression browser and verify that Prometheus now has information
+about time series that these example endpoints expose, such as the
+`rpc_durations_seconds` metric.
+
+## Configure rules for aggregating scraped data into new time series
+
+Though not a problem in our example, queries that aggregate over thousands of
+time series can get slow when computed ad-hoc. To make this more efficient,
+Prometheus allows you to prerecord expressions into completely new persisted
+time series via configured recording rules. Let's say we are interested in
+recording the per-second rate of example RPCs
+(`rpc_durations_seconds_count`) averaged over all instances (but
+preserving the `job` and `service` dimensions) as measured over a window of 5
+minutes. We could write this as:
+
+```
+avg(rate(rpc_durations_seconds_count[5m])) by (job, service)
+```
+
+Try graphing this expression.
+
+To record the time series resulting from this expression into a new metric
+called `job_service:rpc_durations_seconds_count:avg_rate5m`, create a file
+with the following recording rule and save it as `prometheus.rules`:
+
+```
+job_service:rpc_durations_seconds_count:avg_rate5m = avg(rate(rpc_durations_seconds_count[5m])) by (job, service)
+```
+
+To make Prometheus pick up this new rule, add a `rule_files` statement to the
+global configuration section in your `prometheus.yml`. The config should now
+look like this:
+
+```yaml
+global:
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+  evaluation_interval: 15s # Evaluate rules every 15 seconds.
+
+  # Attach these extra labels to all timeseries collected by this Prometheus instance.
+  external_labels:
+    monitor: 'codelab-monitor'
+
+rule_files:
+  - 'prometheus.rules'
+
+scrape_configs:
+  - job_name: 'prometheus'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    static_configs:
+      - targets: ['localhost:9090']
+
+  - job_name:       'example-random'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    static_configs:
+      - targets: ['localhost:8080', 'localhost:8081']
+        labels:
+          group: 'production'
+
+      - targets: ['localhost:8082']
+        labels:
+          group: 'canary'
+```
+
+Restart Prometheus with the new configuration and verify that a new time series
+with the metric name `job_service:rpc_durations_seconds_count:avg_rate5m`
+is now available by querying it through the expression browser or graphing it.
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000000..8f4e3aabc6
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,16 @@
+---
+# todo: internal
+---
+
+# Prometheus 1.8
+
+Welcome to the documentation of the Prometheus server.
+
+The documentation is available alongside all the project documentation at
+[prometheus.io](https://prometheus.io/docs/prometheus/1.8/).
+
+## Content
+
+- [Installing](install.md)
+- [Getting started](getting_started.md)
+- [Configuration](configuration.md)
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 0000000000..186de8aaf9
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,96 @@
+---
+title: Installing
+---
+
+# Installing
+
+## Using pre-compiled binaries
+
+We provide precompiled binaries for most official Prometheus components. Check
+out the [download section](https://prometheus.io/download) for a list of all
+available versions.
+
+## From source
+
+For building Prometheus components from source, see the `Makefile` targets in
+the respective repository.
+
+## Using Docker
+
+All Prometheus services are available as Docker images under the
+[prom](https://hub.docker.com/u/prom/) organization.
+
+Running Prometheus on Docker is as simple as `docker run -p 9090:9090
+prom/prometheus`. This starts Prometheus with a sample configuration and
+exposes it on port 9090.
+
+The Prometheus image uses a volume to store the actual metrics. For
+production deployments it is highly recommended to use the
+[Data Volume Container](https://docs.docker.com/engine/userguide/containers/dockervolumes/#creating-and-mounting-a-data-volume-container)
+pattern to ease managing the data on Prometheus upgrades.
+
+To provide your own configuration, there are several options. Here are
+two examples.
+
+### Volumes & bind-mount
+
+Bind-mount your prometheus.yml from the host by running:
+
+```
+docker run -p 9090:9090 -v /tmp/prometheus.yml:/etc/prometheus/prometheus.yml \
+       prom/prometheus
+```
+
+Or use an additional volume for the config:
+
+```
+docker run -p 9090:9090 -v /prometheus-data \
+       prom/prometheus -config.file=/prometheus-data/prometheus.yml
+```
+
+### Custom image
+
+To avoid managing a file on the host and bind-mount it, the
+configuration can be baked into the image. This works well if the
+configuration itself is rather static and the same across all
+environments.
+
+For this, create a new directory with a Prometheus configuration and a
+Dockerfile like this:
+
+```
+FROM prom/prometheus
+ADD prometheus.yml /etc/prometheus/
+```
+
+Now build and run it:
+
+```
+docker build -t my-prometheus .
+docker run -p 9090:9090 my-prometheus
+```
+
+A more advanced option is to render the config dynamically on start
+with some tooling or even have a daemon update it periodically.
+
+## Using configuration management systems
+
+If you prefer using configuration management systems you might be interested in
+the following third-party contributions:
+
+Ansible:
+
+* [griggheo/ansible-prometheus](https://github.com/griggheo/ansible-prometheus)
+* [William-Yeh/ansible-prometheus](https://github.com/William-Yeh/ansible-prometheus)
+
+Chef:
+
+* [rayrod2030/chef-prometheus](https://github.com/rayrod2030/chef-prometheus)
+
+Puppet:
+
+* [puppet/prometheus](https://forge.puppet.com/puppet/prometheus)
+
+SaltStack:
+
+* [bechtoldt/saltstack-prometheus-formula](https://github.com/bechtoldt/saltstack-prometheus-formula)

From 41281aff8133e2e6338c1903e1437e7fbb0ee265 Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <tobidt@gmail.com>
Date: Tue, 10 Oct 2017 19:55:29 +0200
Subject: [PATCH 06/11] Include 1.8 changes in configuration docs

---
 docs/configuration.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index 67f214a02c..b0bf8ef8b3 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -296,6 +296,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
 
 * `__meta_consul_address`: the address of the target
 * `__meta_consul_dc`: the datacenter name for the target
+* `__meta_consul_metadata_<key>`: each metadata key value of the target
 * `__meta_consul_node`: the node name defined for the target
 * `__meta_consul_service_address`: the service address of the target
 * `__meta_consul_service_id`: the service ID of the target
@@ -394,6 +395,9 @@ region: <string>
 # Named AWS profile used to connect to the API.
 [ profile: <string> ]
 
+# AWS Role ARN, an alternative to using AWS API keys.
+[ role_arn: <string> ]
+
 # Refresh interval to re-read the instance list.
 [ refresh_interval: <duration> | default = 60s ]
 
@@ -655,6 +659,22 @@ Available meta labels:
 * If the endpoints belong to a service, all labels of the `role: service` discovery are attached.
 * For all targets backed by a pod, all labels of the `role: pod` discovery are attached.
 
+#### `ingress`
+
+The `ingress` role discovers a target for each path of each ingress.
+This is generally useful for blackbox monitoring of an ingress.
+The address will be set to the host specified in the ingress spec.
+
+Available meta labels:
+
+* `__meta_kubernetes_namespace`: The namespace of the ingress object.
+* `__meta_kubernetes_ingress_name`: The name of the ingress object.
+* `__meta_kubernetes_ingress_label_<labelname>`: The label of the ingress object.
+* `__meta_kubernetes_ingress_annotation_<annotationname>`: The annotation of the ingress object.
+* `__meta_kubernetes_ingress_scheme`: Protocol scheme of ingress, `https` if TLS
+  config is set. Defaults to `http`.
+* `__meta_kubernetes_ingress_path`: Path from ingress spec. Defaults to `/`.
+
 See below for the configuration options for Kubernetes discovery:
 
 ```yaml
@@ -719,6 +739,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
 * `__meta_marathon_app_label_<labelname>`: any Marathon labels attached to the app
 * `__meta_marathon_port_definition_label_<labelname>`: the port definition labels
 * `__meta_marathon_port_mapping_label_<labelname>`: the port mapping labels
+* `__meta_marathon_port_index`: the port index number (e.g. `1` for `PORT1`)
 
 See below for the configuration options for Marathon discovery:
 

From 299802dfd03a9a2b28275cb9029c3b1b762e1179 Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <tobidt@gmail.com>
Date: Thu, 26 Oct 2017 15:42:07 +0200
Subject: [PATCH 07/11] Integrate changes from prometheus/docs

---
 docs/configuration.md   | 15 ++++++++-------
 docs/getting_started.md | 27 +++++++++++++--------------
 docs/installation.md    | 29 +++++++++++++++++------------
 3 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index b0bf8ef8b3..0fcca8578a 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1,6 +1,5 @@
 ---
 title: Configuration
-sort_rank: 20
 ---
 
 # Configuration
@@ -549,6 +548,8 @@ project: <string>
 zone: <string>
 
 # Filter can be used optionally to filter the instance list by other criteria
+# Syntax of this filter string is described here in the filter query parameter section:
+# https://cloud.google.com/compute/docs/reference/latest/instances/list
 [ filter: <string> ]
 
 # Refresh interval to re-read the instance list
@@ -770,8 +771,8 @@ in the configuration file), which can also be changed using relabeling.
 
 ### `<nerve_sd_config>`
 
-Nerve SD configurations allow retrieving scrape targets from [AirBnB's
-Nerve](https://github.com/airbnb/nerve) which are stored in
+Nerve SD configurations allow retrieving scrape targets from [AirBnB's Nerve]
+(https://github.com/airbnb/nerve) which are stored in
 [Zookeeper](https://zookeeper.apache.org/).
 
 The following meta labels are available on targets during [relabeling](#relabel_config):
@@ -793,10 +794,10 @@ paths:
 
 ### `<serverset_sd_config>`
 
-Serverset SD configurations allow retrieving scrape targets from
-[Serversets](https://github.com/twitter/finagle/tree/master/finagle-serversets)
-which are stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are
-commonly used by [Finagle](https://twitter.github.io/finagle/) and
+Serverset SD configurations allow retrieving scrape targets from [Serversets]
+(https://github.com/twitter/finagle/tree/master/finagle-serversets) which are
+stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are commonly
+used by [Finagle](https://twitter.github.io/finagle/) and
 [Aurora](http://aurora.apache.org/).
 
 The following meta labels are available on targets during relabeling:
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 8585e25664..112b4b1b7a 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -60,8 +60,8 @@ For a complete specification of configuration options, see the
 
 ## Starting Prometheus
 
-To start Prometheus with your newly created configuration file, change to your
-Prometheus build directory and run:
+To start Prometheus with your newly created configuration file, change to the
+directory containing the Prometheus binary and run:
 
 ```bash
 # Start Prometheus.
@@ -69,9 +69,9 @@ Prometheus build directory and run:
 ./prometheus -config.file=prometheus.yml
 ```
 
-Prometheus should start up and it should show a status page about itself at
-[localhost:9090](http://localhost:9090). Give it a couple of seconds to collect
-data about itself from its own HTTP metrics endpoint.
+Prometheus should start up. You should also be able to browse to a status page
+about itself at [localhost:9090](http://localhost:9090). Give it a couple of
+seconds to collect data about itself from its own HTTP metrics endpoint.
 
 You can also verify that Prometheus is serving metrics about itself by
 navigating to its metrics endpoint:
@@ -81,11 +81,10 @@ The number of OS threads executed by Prometheus is controlled by the
 `GOMAXPROCS` environment variable. As of Go 1.5 the default value is
 the number of cores available.
 
-Blindly setting `GOMAXPROCS` to a high value can be
-counterproductive. See the relevant [Go
-FAQs](http://golang.org/doc/faq#Why_no_multi_CPU).
+Blindly setting `GOMAXPROCS` to a high value can be counterproductive. See the
+relevant [Go FAQs](http://golang.org/doc/faq#Why_no_multi_CPU).
 
-Note that Prometheus by default uses around 3GB in memory. If you have a
+Prometheus by default uses around 3GB in memory. If you have a
 smaller machine, you can tune Prometheus to use less memory.  For details,
 see the [memory usage documentation](storage.md#memory-usage).
 
@@ -96,8 +95,8 @@ use Prometheus's built-in expression browser, navigate to
 http://localhost:9090/graph and choose the "Console" view within the "Graph"
 tab.
 
-As you can gather from http://localhost:9090/metrics, one metric that
-Prometheus exports about itself is called
+As you can gather from [localhost:9090/metrics](http://localhost:9090/metrics),
+one metric that Prometheus exports about itself is called
 `prometheus_target_interval_length_seconds` (the actual amount of time between
 target scrapes). Go ahead and enter this into the expression console:
 
@@ -105,7 +104,7 @@ target scrapes). Go ahead and enter this into the expression console:
 prometheus_target_interval_length_seconds
 ```
 
-This should return a lot of different time series (along with the latest value
+This should return a number of different time series (along with the latest value
 recorded for each), all with the metric name
 `prometheus_target_interval_length_seconds`, but with different labels. These
 labels designate different latency percentiles and target group intervals.
@@ -186,7 +185,7 @@ section in your `prometheus.yml` and restart your Prometheus instance:
 
 ```yaml
 scrape_configs:
-  - job_name: 'example-random'
+  - job_name:       'example-random'
 
     # Override the global default and scrape targets from this job every 5 seconds.
     scrape_interval: 5s
@@ -231,7 +230,7 @@ job_service:rpc_durations_seconds_count:avg_rate5m = avg(rate(rpc_durations_seco
 ```
 
 To make Prometheus pick up this new rule, add a `rule_files` statement to the
-global configuration section in your `prometheus.yml`. The config should now
+`global` configuration section in your `prometheus.yml`. The config should now
 look like this:
 
 ```yaml
diff --git a/docs/installation.md b/docs/installation.md
index 186de8aaf9..1f7648cf97 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -15,6 +15,11 @@ available versions.
 For building Prometheus components from source, see the `Makefile` targets in
 the respective repository.
 
+NOTE: **Note:** The documentation on this website refers to the latest stable
+release (excluding pre-releases). The branch
+[next-release](https://github.com/prometheus/docs/compare/next-release) refers
+to unreleased changes that are in master branches of source repos.
+
 ## Using Docker
 
 All Prometheus services are available as Docker images under the
@@ -26,7 +31,7 @@ exposes it on port 9090.
 
 The Prometheus image uses a volume to store the actual metrics. For
 production deployments it is highly recommended to use the
-[Data Volume Container](https://docs.docker.com/engine/userguide/containers/dockervolumes/#creating-and-mounting-a-data-volume-container)
+[Data Volume Container](https://docs.docker.com/engine/admin/volumes/volumes/)
 pattern to ease managing the data on Prometheus upgrades.
 
 To provide your own configuration, there are several options. Here are
@@ -34,16 +39,16 @@ two examples.
 
 ### Volumes & bind-mount
 
-Bind-mount your prometheus.yml from the host by running:
+Bind-mount your `prometheus.yml` from the host by running:
 
-```
+```bash
 docker run -p 9090:9090 -v /tmp/prometheus.yml:/etc/prometheus/prometheus.yml \
        prom/prometheus
 ```
 
 Or use an additional volume for the config:
 
-```
+```bash
 docker run -p 9090:9090 -v /prometheus-data \
        prom/prometheus -config.file=/prometheus-data/prometheus.yml
 ```
@@ -56,21 +61,21 @@ configuration itself is rather static and the same across all
 environments.
 
 For this, create a new directory with a Prometheus configuration and a
-Dockerfile like this:
+`Dockerfile` like this:
 
-```
+```Dockerfile
 FROM prom/prometheus
 ADD prometheus.yml /etc/prometheus/
 ```
 
 Now build and run it:
 
-```
+```bash
 docker build -t my-prometheus .
 docker run -p 9090:9090 my-prometheus
 ```
 
-A more advanced option is to render the config dynamically on start
+A more advanced option is to render the configuration dynamically on start
 with some tooling or even have a daemon update it periodically.
 
 ## Using configuration management systems
@@ -78,19 +83,19 @@ with some tooling or even have a daemon update it periodically.
 If you prefer using configuration management systems you might be interested in
 the following third-party contributions:
 
-Ansible:
+### Ansible
 
 * [griggheo/ansible-prometheus](https://github.com/griggheo/ansible-prometheus)
 * [William-Yeh/ansible-prometheus](https://github.com/William-Yeh/ansible-prometheus)
 
-Chef:
+### Chef
 
 * [rayrod2030/chef-prometheus](https://github.com/rayrod2030/chef-prometheus)
 
-Puppet:
+### Puppet
 
 * [puppet/prometheus](https://forge.puppet.com/puppet/prometheus)
 
-SaltStack:
+### SaltStack
 
 * [bechtoldt/saltstack-prometheus-formula](https://github.com/bechtoldt/saltstack-prometheus-formula)

From e6cdc2d35570a0890efe026b2cad2c0d99a335cc Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <tobidt@gmail.com>
Date: Thu, 26 Oct 2017 15:53:27 +0200
Subject: [PATCH 08/11] Import querying documentation from prometheus/docs

---
 docs/configuration.md      |   1 +
 docs/getting_started.md    |   2 +-
 docs/index.md              |   1 +
 docs/installation.md       |   5 +-
 docs/querying/api.md       | 417 +++++++++++++++++++++++++++++++++++++
 docs/querying/basics.md    | 215 +++++++++++++++++++
 docs/querying/examples.md  |  83 ++++++++
 docs/querying/functions.md | 408 ++++++++++++++++++++++++++++++++++++
 docs/querying/index.md     |   4 +
 docs/querying/operators.md | 250 ++++++++++++++++++++++
 docs/querying/rules.md     |  66 ++++++
 11 files changed, 1449 insertions(+), 3 deletions(-)
 create mode 100644 docs/querying/api.md
 create mode 100644 docs/querying/basics.md
 create mode 100644 docs/querying/examples.md
 create mode 100644 docs/querying/functions.md
 create mode 100644 docs/querying/index.md
 create mode 100644 docs/querying/operators.md
 create mode 100644 docs/querying/rules.md

diff --git a/docs/configuration.md b/docs/configuration.md
index 0fcca8578a..4efd392c77 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1,5 +1,6 @@
 ---
 title: Configuration
+sort_rank: 3
 ---
 
 # Configuration
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 112b4b1b7a..a2518bd43e 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -1,6 +1,6 @@
 ---
 title: Getting started
-sort_rank: 10
+sort_rank: 1
 ---
 
 # Getting started
diff --git a/docs/index.md b/docs/index.md
index 8f4e3aabc6..8641cd1b07 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,3 +14,4 @@ The documentation is available alongside all the project documentation at
 - [Installing](install.md)
 - [Getting started](getting_started.md)
 - [Configuration](configuration.md)
+- [Querying](querying/basics.md)
diff --git a/docs/installation.md b/docs/installation.md
index 1f7648cf97..4d00edea6b 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -1,8 +1,9 @@
 ---
-title: Installing
+title: Installation
+sort_rank: 2
 ---
 
-# Installing
+# Installation
 
 ## Using pre-compiled binaries
 
diff --git a/docs/querying/api.md b/docs/querying/api.md
new file mode 100644
index 0000000000..c23677a5a9
--- /dev/null
+++ b/docs/querying/api.md
@@ -0,0 +1,417 @@
+---
+title: HTTP API
+sort_rank: 7
+---
+
+# HTTP API
+
+The current stable HTTP API is reachable under `/api/v1` on a Prometheus
+server. Any non-breaking additions will be added under that endpoint.
+
+## Format overview
+
+The API response format is JSON. Every successful API request returns a `2xx`
+status code.
+
+Invalid requests that reach the API handlers return a JSON error object
+and one of the following HTTP response codes:
+
+- `400 Bad Request` when parameters are missing or incorrect.
+- `422 Unprocessable Entity` when an expression can't be executed
+  ([RFC4918](http://tools.ietf.org/html/rfc4918#page-78)).
+- `503 Service Unavailable` when queries time out or abort.
+
+Other non-`2xx` codes may be returned for errors occurring before the API
+endpoint is reached.
+
+The JSON response envelope format is as follows:
+
+```
+{
+  "status": "success" | "error",
+  "data": <data>,
+
+  // Only set if status is "error". The data field may still hold
+  // additional data.
+  "errorType": "<string>",
+  "error": "<string>"
+}
+```
+
+Input timestamps may be provided either in
+[RFC3339](https://www.ietf.org/rfc/rfc3339.txt) format or as a Unix timestamp
+in seconds, with optional decimal places for sub-second precision. Output
+timestamps are always represented as Unix timestamps in seconds.
+
+Names of query parameters that may be repeated end with `[]`.
+
+`<series_selector>` placeholders refer to Prometheus [time series
+selectors](basics.md#time-series-selectors) like `http_requests_total` or
+`http_requests_total{method=~"^GET|POST$"}` and need to be URL-encoded.
+
+`<duration>` placeholders refer to Prometheus duration strings of the form
+`[0-9]+[smhdwy]`. For example, `5m` refers to a duration of 5 minutes.
+
+## Expression queries
+
+Query language expressions may be evaluated at a single instant or over a range
+of time. The sections below describe the API endpoints for each type of
+expression query.
+
+### Instant queries
+
+The following endpoint evaluates an instant query at a single point in time:
+
+```
+GET /api/v1/query
+```
+
+URL query parameters:
+
+- `query=<string>`: Prometheus expression query string.
+- `time=<rfc3339 | unix_timestamp>`: Evaluation timestamp. Optional.
+- `timeout=<duration>`: Evaluation timeout. Optional. Defaults to and
+   is capped by the value of the `-query.timeout` flag.
+
+The current server time is used if the `time` parameter is omitted.
+
+The `data` section of the query result has the following format:
+
+```
+{
+  "resultType": "matrix" | "vector" | "scalar" | "string",
+  "result": <value>
+}
+```
+
+`<value>` refers to the query result data, which has varying formats
+depending on the `resultType`. See the [expression query result
+formats](#expression-query-result-formats).
+
+The following example evaluates the expression `up` at the time
+`2015-07-01T20:10:51.781Z`:
+
+```json
+$ curl 'http://localhost:9090/api/v1/query?query=up&time=2015-07-01T20:10:51.781Z'
+{
+   "status" : "success",
+   "data" : {
+      "resultType" : "vector",
+      "result" : [
+         {
+            "metric" : {
+               "__name__" : "up",
+               "job" : "prometheus",
+               "instance" : "localhost:9090"
+            },
+            "value": [ 1435781451.781, "1" ]
+         },
+         {
+            "metric" : {
+               "__name__" : "up",
+               "job" : "node",
+               "instance" : "localhost:9100"
+            },
+            "value" : [ 1435781451.781, "0" ]
+         }
+      ]
+   }
+}
+```
+
+### Range queries
+
+The following endpoint evaluates an expression query over a range of time:
+
+```
+GET /api/v1/query_range
+```
+
+URL query parameters:
+
+- `query=<string>`: Prometheus expression query string.
+- `start=<rfc3339 | unix_timestamp>`: Start timestamp.
+- `end=<rfc3339 | unix_timestamp>`: End timestamp.
+- `step=<duration>`: Query resolution step width.
+- `timeout=<duration>`: Evaluation timeout. Optional. Defaults to and
+   is capped by the value of the `-query.timeout` flag.
+
+The `data` section of the query result has the following format:
+
+```
+{
+  "resultType": "matrix",
+  "result": <value>
+}
+```
+
+For the format of the `<value>` placeholder, see the [range-vector result
+format](#range-vectors).
+
+The following example evaluates the expression `up` over a 30-second range with
+a query resolution of 15 seconds.
+
+```json
+$ curl 'http://localhost:9090/api/v1/query_range?query=up&start=2015-07-01T20:10:30.781Z&end=2015-07-01T20:11:00.781Z&step=15s'
+{
+   "status" : "success",
+   "data" : {
+      "resultType" : "matrix",
+      "result" : [
+         {
+            "metric" : {
+               "__name__" : "up",
+               "job" : "prometheus",
+               "instance" : "localhost:9090"
+            },
+            "values" : [
+               [ 1435781430.781, "1" ],
+               [ 1435781445.781, "1" ],
+               [ 1435781460.781, "1" ]
+            ]
+         },
+         {
+            "metric" : {
+               "__name__" : "up",
+               "job" : "node",
+               "instance" : "localhost:9091"
+            },
+            "values" : [
+               [ 1435781430.781, "0" ],
+               [ 1435781445.781, "0" ],
+               [ 1435781460.781, "1" ]
+            ]
+         }
+      ]
+   }
+}
+```
+
+## Querying metadata
+
+### Finding series by label matchers
+
+The following endpoint returns the list of time series that match a certain label set.
+
+```
+GET /api/v1/series
+```
+
+URL query parameters:
+
+- `match[]=<series_selector>`: Repeated series selector argument that selects the
+  series to return. At least one `match[]` argument must be provided.
+- `start=<rfc3339 | unix_timestamp>`: Start timestamp.
+- `end=<rfc3339 | unix_timestamp>`: End timestamp.
+
+The `data` section of the query result consists of a list of objects that
+contain the label name/value pairs which identify each series.
+
+The following example returns all series that match either of the selectors
+`up` or `process_start_time_seconds{job="prometheus"}`:
+
+```json
+$ curl -g 'http://localhost:9090/api/v1/series?match[]=up&match[]=process_start_time_seconds{job="prometheus"}'
+{
+   "status" : "success",
+   "data" : [
+      {
+         "__name__" : "up",
+         "job" : "prometheus",
+         "instance" : "localhost:9090"
+      },
+      {
+         "__name__" : "up",
+         "job" : "node",
+         "instance" : "localhost:9091"
+      },
+      {
+         "__name__" : "process_start_time_seconds",
+         "job" : "prometheus",
+         "instance" : "localhost:9090"
+      }
+   ]
+}
+```
+
+### Querying label values
+
+The following endpoint returns a list of label values for a provided label name:
+
+```
+GET /api/v1/label/<label_name>/values
+```
+
+The `data` section of the JSON response is a list of string label names.
+
+This example queries for all label values for the `job` label:
+
+```json
+$ curl http://localhost:9090/api/v1/label/job/values
+{
+   "status" : "success",
+   "data" : [
+      "node",
+      "prometheus"
+   ]
+}
+```
+
+## Deleting series
+
+The following endpoint deletes matched series entirely from a Prometheus server:
+
+```
+DELETE /api/v1/series
+```
+
+URL query parameters:
+
+- `match[]=<series_selector>`: Repeated label matcher argument that selects the
+  series to delete. At least one `match[]` argument must be provided.
+
+The `data` section of the JSON response has the following format:
+
+```
+{
+  "numDeleted": <number of deleted series>
+}
+```
+
+The following example deletes all series that match either of the selectors
+`up` or `process_start_time_seconds{job="prometheus"}`:
+
+```json
+$ curl -XDELETE -g 'http://localhost:9090/api/v1/series?match[]=up&match[]=process_start_time_seconds{job="prometheus"}'
+{
+   "status" : "success",
+   "data" : {
+      "numDeleted" : 3
+   }
+}
+```
+
+## Expression query result formats
+
+Expression queries may return the following response values in the `result`
+property of the `data` section. `<sample_value>` placeholders are numeric
+sample values. JSON does not support special float values such as `NaN`, `Inf`,
+and `-Inf`, so sample values are transferred as quoted JSON strings rather than
+raw numbers.
+
+### Range vectors
+
+Range vectors are returned as result type `matrix`. The corresponding
+`result` property has the following format:
+
+```
+[
+  {
+    "metric": { "<label_name>": "<label_value>", ... },
+    "values": [ [ <unix_time>, "<sample_value>" ], ... ]
+  },
+  ...
+]
+```
+
+### Instant vectors
+
+Instant vectors are returned as result type `vector`. The corresponding
+`result` property has the following format:
+
+```
+[
+  {
+    "metric": { "<label_name>": "<label_value>", ... },
+    "value": [ <unix_time>, "<sample_value>" ]
+  },
+  ...
+]
+```
+
+### Scalars
+
+Scalar results are returned as result type `scalar`. The corresponding
+`result` property has the following format:
+
+```
+[ <unix_time>, "<scalar_value>" ]
+```
+
+### Strings
+
+String results are returned as result type `string`. The corresponding
+`result` property has the following format:
+
+```
+[ <unix_time>, "<string_value>" ]
+```
+
+## Targets
+
+> This API is experimental as it is intended to be extended with targets
+> dropped due to relabelling in the future.
+
+The following endpoint returns an overview of the current state of the
+Prometheus target discovery:
+
+```
+GET /api/v1/targets
+```
+
+Currently only the active targets are part of the response.
+
+```json
+$ curl http://localhost:9090/api/v1/targets
+{
+  "status": "success",                                                                                                                                [3/11]
+  "data": {
+    "activeTargets": [
+      {
+        "discoveredLabels": {
+          "__address__": "127.0.0.1:9090",
+          "__metrics_path__": "/metrics",
+          "__scheme__": "http",
+          "job": "prometheus"
+        },
+        "labels": {
+          "instance": "127.0.0.1:9090",
+          "job": "prometheus"
+        },
+        "scrapeUrl": "http://127.0.0.1:9090/metrics",
+        "lastError": "",
+        "lastScrape": "2017-01-17T15:07:44.723715405+01:00",
+        "health": "up"
+      }
+    ]
+  }
+}
+```
+
+## Alertmanagers
+
+> This API is experimental as it is intended to be extended with Alertmanagers
+> dropped due to relabelling in the future.
+
+The following endpoint returns an overview of the current state of the
+Prometheus alertmanager discovery:
+
+```
+GET /api/v1/alertmanagers
+```
+
+Currently only the active Alertmanagers are part of the response.
+
+```json
+$ curl http://localhost:9090/api/v1/alertmanagers
+{
+  "status": "success",
+  "data": {
+    "activeAlertmanagers": [
+      {
+        "url": "http://127.0.0.1:9090/api/v1/alerts"
+      }
+    ]
+  }
+}
+```
diff --git a/docs/querying/basics.md b/docs/querying/basics.md
new file mode 100644
index 0000000000..f001c6d0d1
--- /dev/null
+++ b/docs/querying/basics.md
@@ -0,0 +1,215 @@
+---
+title: Querying basics
+nav_title: Basics
+sort_rank: 1
+---
+
+# Querying Prometheus
+
+Prometheus provides a functional expression language that lets the user select
+and aggregate time series data in real time. The result of an expression can
+either be shown as a graph, viewed as tabular data in Prometheus's expression
+browser, or consumed by external systems via the [HTTP API](api.md).
+
+## Examples
+
+This document is meant as a reference. For learning, it might be easier to
+start with a couple of [examples](examples.md).
+
+## Expression language data types
+
+In Prometheus's expression language, an expression or sub-expression can
+evaluate to one of four types:
+
+* **Instant vector** - a set of time series containing a single sample for each time series, all sharing the same timestamp
+* **Range vector** - a set of time series containing a range of data points over time for each time series
+* **Scalar** - a simple numeric floating point value
+* **String** - a simple string value; currently unused
+
+Depending on the use-case (e.g. when graphing vs. displaying the output of an
+expression), only some of these types are legal as the result from a
+user-specified expression. For example, an expression that returns an instant
+vector is the only type that can be directly graphed.
+
+## Literals
+
+### String literals
+
+Strings may be specified as literals in single quotes, double quotes or
+backticks.
+
+PromQL follows the same [escaping rules as
+Go](https://golang.org/ref/spec#String_literals). In single or double quotes a
+backslash begins an escape sequence, which may be followed by `a`, `b`, `f`,
+`n`, `r`, `t`, `v` or `\`. Specific characters can be provided using octal
+(`\nnn`) or hexadecimal (`\xnn`, `\unnnn` and `\Unnnnnnnn`).
+
+No escaping is processed inside backticks. Unlike Go, Prometheus does not discard newlines inside backticks.
+
+Example:
+
+    "this is a string"
+    'these are unescaped: \n \\ \t'
+    `these are not unescaped: \n ' " \t`
+
+### Float literals
+
+Scalar float values can be literally written as numbers of the form
+`[-](digits)[.(digits)]`.
+
+    -2.43
+
+## Time series Selectors
+
+### Instant vector selectors
+
+Instant vector selectors allow the selection of a set of time series and a
+single sample value for each at a given timestamp (instant): in the simplest
+form, only a metric name is specified. This results in an instant vector
+containing elements for all time series that have this metric name.
+
+This example selects all time series that have the `http_requests_total` metric
+name:
+
+    http_requests_total
+
+It is possible to filter these time series further by appending a set of labels
+to match in curly braces (`{}`).
+
+This example selects only those time series with the `http_requests_total`
+metric name that also have the `job` label set to `prometheus` and their
+`group` label set to `canary`:
+
+    http_requests_total{job="prometheus",group="canary"}
+
+It is also possible to negatively match a label value, or to match label values
+against regular expressions. The following label matching operators exist:
+
+* `=`: Select labels that are exactly equal to the provided string.
+* `!=`: Select labels that are not equal to the provided string.
+* `=~`: Select labels that regex-match the provided string (or substring).
+* `!~`: Select labels that do not regex-match the provided string (or substring).
+
+For example, this selects all `http_requests_total` time series for `staging`,
+`testing`, and `development` environments and HTTP methods other than `GET`.
+
+    http_requests_total{environment=~"staging|testing|development",method!="GET"}
+
+Label matchers that match empty label values also select all time series that do
+not have the specific label set at all. Regex-matches are fully anchored.
+
+Vector selectors must either specify a name or at least one label matcher
+that does not match the empty string. The following expression is illegal:
+
+    {job=~".*"} # Bad!
+
+In contrast, these expressions are valid as they both have a selector that does not
+match empty label values.
+
+    {job=~".+"}              # Good!
+    {job=~".*",method="get"} # Good!
+
+Label matchers can also be applied to metric names by matching against the internal
+`__name__` label. For example, the expression `http_requests_total` is equivalent to
+`{__name__="http_requests_total"}`. Matchers other than `=` (`!=`, `=~`, `!~`) may also be used.
+The following expression selects all metrics that have a name starting with `job:`:
+
+    {__name__=~"^job:.*"}
+
+### Range Vector Selectors
+
+Range vector literals work like instant vector literals, except that they
+select a range of samples back from the current instant. Syntactically, a range
+duration is appended in square brackets (`[]`) at the end of a vector selector
+to specify how far back in time values should be fetched for each resulting
+range vector element.
+
+Time durations are specified as a number, followed immediately by one of the
+following units:
+
+* `s` - seconds
+* `m` - minutes
+* `h` - hours
+* `d` - days
+* `w` - weeks
+* `y` - years
+
+In this example, we select all the values we have recorded within the last 5
+minutes for all time series that have the metric name `http_requests_total` and
+a `job` label set to `prometheus`:
+
+    http_requests_total{job="prometheus"}[5m]
+
+### Offset modifier
+
+The `offset` modifier allows changing the time offset for individual
+instant and range vectors in a query.
+
+For example, the following expression returns the value of
+`http_requests_total` 5 minutes in the past relative to the current
+query evaluation time:
+
+    http_requests_total offset 5m
+
+Note that the `offset` modifier always needs to follow the selector
+immediately, i.e. the following would be correct:
+
+    sum(http_requests_total{method="GET"} offset 5m) // GOOD.
+
+While the following would be *incorrect*:
+
+    sum(http_requests_total{method="GET"}) offset 5m // INVALID.
+
+The same works for range vectors. This returns the 5-minutes rate that
+`http_requests_total` had a week ago:
+
+    rate(http_requests_total[5m] offset 1w)
+
+## Operators
+
+Prometheus supports many binary and aggregation operators. These are described
+in detail in the [expression language operators](operators.md) page.
+
+## Functions
+
+Prometheus supports several functions to operate on data. These are described
+in detail in the [expression language functions](functions.md) page.
+
+## Gotchas
+
+### Interpolation and staleness
+
+When queries are run, timestamps at which to sample data are selected
+independently of the actual present time series data. This is mainly to support
+cases like aggregation (`sum`, `avg`, and so on), where multiple aggregated
+time series do not exactly align in time. Because of their independence,
+Prometheus needs to assign a value at those timestamps for each relevant time
+series. It does so by simply taking the newest sample before this timestamp.
+
+If no stored sample is found (by default) 5 minutes before a sampling timestamp,
+no value is assigned for this time series at this point in time. This
+effectively means that time series "disappear" from graphs at times where their
+latest collected sample is older than 5 minutes.
+
+NOTE: <b>NOTE:</b> Staleness and interpolation handling might change. See
+https://github.com/prometheus/prometheus/issues/398 and
+https://github.com/prometheus/prometheus/issues/581.
+
+### Avoiding slow queries and overloads
+
+If a query needs to operate on a very large amount of data, graphing it might
+time out or overload the server or browser. Thus, when constructing queries
+over unknown data, always start building the query in the tabular view of
+Prometheus's expression browser until the result set seems reasonable
+(hundreds, not thousands, of time series at most).  Only when you have filtered
+or aggregated your data sufficiently, switch to graph mode. If the expression
+still takes too long to graph ad-hoc, pre-record it via a [recording
+rule](rules.md#recording-rules).
+
+This is especially relevant for Prometheus's query language, where a bare
+metric name selector like `api_http_requests_total` could expand to thousands
+of time series with different labels. Also keep in mind that expressions which
+aggregate over many time series will generate load on the server even if the
+output is only a small number of time series. This is similar to how it would
+be slow to sum all values of a column in a relational database, even if the
+output value is only a single number.
diff --git a/docs/querying/examples.md b/docs/querying/examples.md
new file mode 100644
index 0000000000..4e522ab85d
--- /dev/null
+++ b/docs/querying/examples.md
@@ -0,0 +1,83 @@
+---
+title: Querying examples
+nav_title: Examples
+sort_rank: 4
+---
+
+# Query examples
+
+## Simple time series selection
+
+Return all time series with the metric `http_requests_total`:
+
+    http_requests_total
+
+Return all time series with the metric `http_requests_total` and the given
+`job` and `handler` labels:
+
+    http_requests_total{job="apiserver", handler="/api/comments"}
+
+Return a whole range of time (in this case 5 minutes) for the same vector,
+making it a range vector:
+
+    http_requests_total{job="apiserver", handler="/api/comments"}[5m]
+
+Note that an expression resulting in a range vector cannot be graphed directly,
+but viewed in the tabular ("Console") view of the expression browser.
+
+Using regular expressions, you could select time series only for jobs whose
+name match a certain pattern, in this case, all jobs that end with `server`.
+Note that this does a substring match, not a full string match:
+
+    http_requests_total{job=~"server$"}
+
+To select all HTTP status codes except 4xx ones, you could run:
+
+    http_requests_total{status!~"^4..$"}
+
+## Using functions, operators, etc.
+
+Return the per-second rate for all time series with the `http_requests_total`
+metric name, as measured over the last 5 minutes:
+
+    rate(http_requests_total[5m])
+
+Assuming that the `http_requests_total` time series all have the labels `job`
+(fanout by job name) and `instance` (fanout by instance of the job), we might
+want to sum over the rate of all instances, so we get fewer output time series,
+but still preserve the `job` dimension:
+
+    sum(rate(http_requests_total[5m])) by (job)
+
+If we have two different metrics with the same dimensional labels, we can apply
+binary operators to them and elements on both sides with the same label set
+will get matched and propagated to the output. For example, this expression
+returns the unused memory in MiB for every instance (on a fictional cluster
+scheduler exposing these metrics about the instances it runs):
+
+    (instance_memory_limit_bytes - instance_memory_usage_bytes) / 1024 / 1024
+
+The same expression, but summed by application, could be written like this:
+
+    sum(
+      instance_memory_limit_bytes - instance_memory_usage_bytes
+    ) by (app, proc) / 1024 / 1024
+
+If the same fictional cluster scheduler exposed CPU usage metrics like the
+following for every instance:
+
+    instance_cpu_time_ns{app="lion", proc="web", rev="34d0f99", env="prod", job="cluster-manager"}
+    instance_cpu_time_ns{app="elephant", proc="worker", rev="34d0f99", env="prod", job="cluster-manager"}
+    instance_cpu_time_ns{app="turtle", proc="api", rev="4d3a513", env="prod", job="cluster-manager"}
+    instance_cpu_time_ns{app="fox", proc="widget", rev="4d3a513", env="prod", job="cluster-manager"}
+    ...
+
+...we could get the top 3 CPU users grouped by application (`app`) and process
+type (`proc`) like this:
+
+    topk(3, sum(rate(instance_cpu_time_ns[5m])) by (app, proc))
+
+Assuming this metric contains one time series per running instance, you could
+count the number of running instances per application like this:
+
+    count(instance_cpu_time_ns) by (app)
diff --git a/docs/querying/functions.md b/docs/querying/functions.md
new file mode 100644
index 0000000000..74e6740285
--- /dev/null
+++ b/docs/querying/functions.md
@@ -0,0 +1,408 @@
+---
+title: Query functions
+nav_title: Functions
+sort_rank: 3
+---
+
+# Functions
+
+Some functions have default arguments, e.g. `year(v=vector(time())
+instant-vector)`. This means that there is one argument `v` which is an instant
+vector, which if not provided it will default to the value of the expression
+`vector(time())`.
+
+## `abs()`
+
+`abs(v instant-vector)` returns the input vector with all sample values converted to
+their absolute value.
+
+## `absent()`
+
+`absent(v instant-vector)` returns an empty vector if the vector passed to it
+has any elements and a 1-element vector with the value 1 if the vector passed to
+it has no elements.
+
+This is useful for alerting on when no time series exist for a given metric name
+and label combination.
+
+```
+absent(nonexistent{job="myjob"})
+# => {job="myjob"}
+
+absent(nonexistent{job="myjob",instance=~".*"})
+# => {job="myjob"}
+
+absent(sum(nonexistent{job="myjob"}))
+# => {}
+```
+
+In the second example, `absent()` tries to be smart about deriving labels of the
+1-element output vector from the input vector.
+
+## `ceil()`
+
+`ceil(v instant-vector)` rounds the sample values of all elements in `v` up to
+the nearest integer.
+
+## `changes()`
+
+For each input time series, `changes(v range-vector)` returns the number of
+times its value has changed within the provided time range as an instant
+vector.
+
+## `clamp_max()`
+
+`clamp_max(v instant-vector, max scalar)` clamps the sample values of all
+elements in `v` to have an upper limit of `max`.
+
+## `clamp_min()`
+
+`clamp_min(v instant-vector, min scalar)` clamps the sample values of all
+elements in `v` to have a lower limit of `min`.
+
+## `count_scalar()`
+
+`count_scalar(v instant-vector)` returns the number of elements in a time series
+vector as a scalar. This is in contrast to the `count()`
+[aggregation operator](operators.md#aggregation-operators), which
+always returns a vector (an empty one if the input vector is empty) and allows
+grouping by labels via a `by` clause.
+
+## `day_of_month()`
+
+`day_of_month(v=vector(time()) instant-vector)` returns the day of the month
+for each of the given times in UTC. Returned values are from 1 to 31.
+
+## `day_of_week()`
+
+`day_of_week(v=vector(time()) instant-vector)` returns the day of the week for
+each of the given times in UTC. Returned values are from 0 to 6, where 0 means
+Sunday etc.
+
+## `days_in_month()`
+
+`days_in_month(v=vector(time()) instant-vector)` returns number of days in the
+month for each of the given times in UTC. Returned values are from 28 to 31.
+
+## `delta()`
+
+`delta(v range-vector)` calculates the difference between the
+first and last value of each time series element in a range vector `v`,
+returning an instant vector with the given deltas and equivalent labels.
+The delta is extrapolated to cover the full time range as specified in
+the range vector selector, so that it is possible to get a non-integer
+result even if the sample values are all integers.
+
+The following example expression returns the difference in CPU temperature
+between now and 2 hours ago:
+
+```
+delta(cpu_temp_celsius{host="zeus"}[2h])
+```
+
+`delta` should only be used with gauges.
+
+## `deriv()`
+
+`deriv(v range-vector)` calculates the per-second derivative of the time series in a range
+vector `v`, using [simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression).
+
+`deriv` should only be used with gauges.
+
+## `drop_common_labels()`
+
+`drop_common_labels(instant-vector)` drops all labels that have the same name
+and value across all series in the input vector.
+
+## `exp()`
+
+`exp(v instant-vector)` calculates the exponential function for all elements in `v`.
+Special cases are:
+
+* `Exp(+Inf) = +Inf`
+* `Exp(NaN) = NaN`
+
+## `floor()`
+
+`floor(v instant-vector)` rounds the sample values of all elements in `v` down
+to the nearest integer.
+
+## `histogram_quantile()`
+
+`histogram_quantile(φ float, b instant-vector)` calculates the φ-quantile (0 ≤ φ
+≤ 1) from the buckets `b` of a
+[histogram](https://prometheus.io/docs/concepts/metric_types/#histogram). (See
+[histograms and summaries](https://prometheus.io/docs/practices/histograms) for
+a detailed explanation of φ-quantiles and the usage of the histogram metric type
+in general.) The samples in `b` are the counts of observations in each bucket.
+Each sample must have a label `le` where the label value denotes the inclusive
+upper bound of the bucket. (Samples without such a label are silently ignored.)
+The [histogram metric type](https://prometheus.io/docs/concepts/metric_types/#histogram)
+automatically provides time series with the `_bucket` suffix and the appropriate
+labels.
+
+Use the `rate()` function to specify the time window for the quantile
+calculation.
+
+Example: A histogram metric is called `http_request_duration_seconds`. To
+calculate the 90th percentile of request durations over the last 10m, use the
+following expression:
+
+    histogram_quantile(0.9, rate(http_request_duration_seconds_bucket[10m]))
+
+The quantile is calculated for each label combination in
+`http_request_duration_seconds`. To aggregate, use the `sum()` aggregator
+around the `rate()` function. Since the `le` label is required by
+`histogram_quantile()`, it has to be included in the `by` clause. The following
+expression aggregates the 90th percentile by `job`:
+
+    histogram_quantile(0.9, sum(rate(http_request_duration_seconds_bucket[10m])) by (job, le))
+
+To aggregate everything, specify only the `le` label:
+
+    histogram_quantile(0.9, sum(rate(http_request_duration_seconds_bucket[10m])) by (le))
+
+The `histogram_quantile()` function interpolates quantile values by
+assuming a linear distribution within a bucket. The highest bucket
+must have an upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If
+a quantile is located in the highest bucket, the upper bound of the
+second highest bucket is returned. A lower limit of the lowest bucket
+is assumed to be 0 if the upper bound of that bucket is greater than
+0. In that case, the usual linear interpolation is applied within that
+bucket. Otherwise, the upper bound of the lowest bucket is returned
+for quantiles located in the lowest bucket.
+
+If `b` contains fewer than two buckets, `NaN` is returned. For φ < 0, `-Inf` is
+returned. For φ > 1, `+Inf` is returned.
+
+## `holt_winters()`
+
+`holt_winters(v range-vector, sf scalar, tf scalar)` produces a smoothed value
+for time series based on the range in `v`. The lower the smoothing factor `sf`,
+the more importance is given to old data. The higher the trend factor `tf`, the
+more trends in the data is considered. Both `sf` and `tf` must be between 0 and
+1.
+
+`holt_winters` should only be used with gauges.
+
+## `hour()`
+
+`hour(v=vector(time()) instant-vector)` returns the hour of the day
+for each of the given times in UTC. Returned values are from 0 to 23.
+
+## `idelta()`
+
+`idelta(v range-vector)`
+
+`idelta(v range-vector)` calculates the difference between the last two samples
+in the range vector `v`, returning an instant vector with the given deltas and
+equivalent labels.
+
+`idelta` should only be used with gauges.
+
+## `increase()`
+
+`increase(v range-vector)` calculates the increase in the
+time series in the range vector. Breaks in monotonicity (such as counter
+resets due to target restarts) are automatically adjusted for. The
+increase is extrapolated to cover the full time range as specified
+in the range vector selector, so that it is possible to get a
+non-integer result even if a counter increases only by integer
+increments.
+
+The following example expression returns the number of HTTP requests as measured
+over the last 5 minutes, per time series in the range vector:
+
+```
+increase(http_requests_total{job="api-server"}[5m])
+```
+
+`increase` should only be used with counters. It is syntactic sugar
+for `rate(v)` multiplied by the number of seconds under the specified
+time range window, and should be used primarily for human readability.
+Use `rate` in recording rules so that increases are tracked consistently
+on a per-second basis.
+
+## `irate()`
+
+`irate(v range-vector)` calculates the per-second instant rate of increase of
+the time series in the range vector. This is based on the last two data points.
+Breaks in monotonicity (such as counter resets due to target restarts) are
+automatically adjusted for.
+
+The following example expression returns the per-second rate of HTTP requests
+looking up to 5 minutes back for the two most recent data points, per time
+series in the range vector:
+
+```
+irate(http_requests_total{job="api-server"}[5m])
+```
+
+`irate` should only be used when graphing volatile, fast-moving counters.
+Use `rate` for alerts and slow-moving counters, as brief changes
+in the rate can reset the `FOR` clause and graphs consisting entirely of rare
+spikes are hard to read.
+
+Note that when combining `irate()` with an
+[aggregation operator](operators.md#aggregation-operators) (e.g. `sum()`)
+or a function aggregating over time (any function ending in `_over_time`),
+always take a `irate()` first, then aggregate. Otherwise `irate()` cannot detect
+counter resets when your target restarts.
+
+## `label_join()`
+
+For each timeseries in `v`, `label_join(v instant-vector, dst_label string, separator string, src_label_1 string, src_label_2 string, ...)` joins all the values of all the `src_labels`
+using `separator` and returns the timeseries with the label `dst_label` containing the joined value.
+There can be any number of `src_labels` in this function.
+
+This example will return a vector with each time series having a `foo` label with the value `a,b,c` added to it:
+
+```
+label_join(up{job="api-server",src1="a",src2="b",src3="c"}, "foo", ",", "src1", "src2", "src3")
+```
+
+## `label_replace()`
+
+For each timeseries in `v`, `label_replace(v instant-vector, dst_label string,
+replacement string, src_label string, regex string)` matches the regular
+expression `regex` against the label `src_label`.  If it matches, then the
+timeseries is returned with the label `dst_label` replaced by the expansion of
+`replacement`. `$1` is replaced with the first matching subgroup, `$2` with the
+second etc. If the regular expression doesn't match then the timeseries is
+returned unchanged.
+
+This example will return a vector with each time series having a `foo`
+label with the value `a` added to it:
+
+```
+label_replace(up{job="api-server",service="a:c"}, "foo", "$1", "service", "(.*):.*")
+```
+
+## `ln()`
+
+`ln(v instant-vector)` calculates the natural logarithm for all elements in `v`.
+Special cases are:
+
+* `ln(+Inf) = +Inf`
+* `ln(0) = -Inf`
+* `ln(x < 0) = NaN`
+* `ln(NaN) = NaN`
+
+## `log2()`
+
+`log2(v instant-vector)` calculates the binary logarithm for all elements in `v`.
+The special cases are equivalent to those in `ln`.
+
+## `log10()`
+
+`log10(v instant-vector)` calculates the decimal logarithm for all elements in `v`.
+The special cases are equivalent to those in `ln`.
+
+## `minute()`
+
+`minute(v=vector(time()) instant-vector)` returns the minute of the hour for each
+of the given times in UTC. Returned values are from 0 to 59.
+
+## `month()`
+
+`month(v=vector(time()) instant-vector)` returns the month of the year for each
+of the given times in UTC. Returned values are from 1 to 12, where 1 means
+January etc.
+
+## `predict_linear()`
+
+`predict_linear(v range-vector, t scalar)` predicts the value of time series
+`t` seconds from now, based on the range vector `v`, using [simple linear
+regression](http://en.wikipedia.org/wiki/Simple_linear_regression).
+
+`predict_linear` should only be used with gauges.
+
+## `rate()`
+
+`rate(v range-vector)` calculates the per-second average rate of increase of the
+time series in the range vector. Breaks in monotonicity (such as counter
+resets due to target restarts) are automatically adjusted for. Also, the
+calculation extrapolates to the ends of the time range, allowing for missed
+scrapes or imperfect alignment of scrape cycles with the range's time period.
+
+The following example expression returns the per-second rate of HTTP requests as measured
+over the last 5 minutes, per time series in the range vector:
+
+```
+rate(http_requests_total{job="api-server"}[5m])
+```
+
+`rate` should only be used with counters. It is best suited for alerting,
+and for graphing of slow-moving counters.
+
+Note that when combining `rate()` with an aggregation operator (e.g. `sum()`)
+or a function aggregating over time (any function ending in `_over_time`),
+always take a `rate()` first, then aggregate. Otherwise `rate()` cannot detect
+counter resets when your target restarts.
+
+## `resets()`
+
+For each input time series, `resets(v range-vector)` returns the number of
+counter resets within the provided time range as an instant vector. Any
+decrease in the value between two consecutive samples is interpreted as a
+counter reset.
+
+`resets` should only be used with counters.
+
+## `round()`
+
+`round(v instant-vector, to_nearest=1 scalar)` rounds the sample values of all
+elements in `v` to the nearest integer. Ties are resolved by rounding up. The
+optional `to_nearest` argument allows specifying the nearest multiple to which
+the sample values should be rounded. This multiple may also be a fraction.
+
+## `scalar()`
+
+Given a single-element input vector, `scalar(v instant-vector)` returns the
+sample value of that single element as a scalar. If the input vector does not
+have exactly one element, `scalar` will return `NaN`.
+
+## `sort()`
+
+`sort(v instant-vector)` returns vector elements sorted by their sample values,
+in ascending order.
+
+## `sort_desc()`
+
+Same as `sort`, but sorts in descending order.
+
+## `sqrt()`
+
+`sqrt(v instant-vector)` calculates the square root of all elements in `v`.
+
+## `time()`
+
+`time()` returns the number of seconds since January 1, 1970 UTC. Note that
+this does not actually return the current time, but the time at which the
+expression is to be evaluated.
+
+## `vector()`
+
+`vector(s scalar)` returns the scalar `s` as a vector with no labels.
+
+## `year()`
+
+`year(v=vector(time()) instant-vector)` returns the year
+for each of the given times in UTC.
+
+## `<aggregation>_over_time()`
+
+The following functions allow aggregating each series of a given range vector
+over time and return an instant vector with per-series aggregation results:
+
+* `avg_over_time(range-vector)`: the average value of all points in the specified interval.
+* `min_over_time(range-vector)`: the minimum value of all points in the specified interval.
+* `max_over_time(range-vector)`: the maximum value of all points in the specified interval.
+* `sum_over_time(range-vector)`: the sum of all values in the specified interval.
+* `count_over_time(range-vector)`: the count of all values in the specified interval.
+* `quantile_over_time(scalar, range-vector)`: the φ-quantile (0 ≤ φ ≤ 1) of the values in the specified interval.
+* `stddev_over_time(range-vector)`: the population standard deviation of the values in the specified interval.
+* `stdvar_over_time(range-vector)`: the population standard variance of the values in the specified interval.
+
+Note that all values in the specified interval have the same weight in the
+aggregation even if the values are not equally spaced throughout the interval.
diff --git a/docs/querying/index.md b/docs/querying/index.md
new file mode 100644
index 0000000000..1566750e89
--- /dev/null
+++ b/docs/querying/index.md
@@ -0,0 +1,4 @@
+---
+title: Querying
+sort_rank: 4
+---
diff --git a/docs/querying/operators.md b/docs/querying/operators.md
new file mode 100644
index 0000000000..7aa7a6b79b
--- /dev/null
+++ b/docs/querying/operators.md
@@ -0,0 +1,250 @@
+---
+title: Operators
+sort_rank: 2
+---
+
+# Operators
+
+## Binary operators
+
+Prometheus's query language supports basic logical and arithmetic operators.
+For operations between two instant vectors, the [matching behavior](#vector-matching)
+can be modified.
+
+### Arithmetic binary operators
+
+The following binary arithmetic operators exist in Prometheus:
+
+* `+` (addition)
+* `-` (subtraction)
+* `*` (multiplication)
+* `/` (division)
+* `%` (modulo)
+* `^` (power/exponentiation)
+
+Binary arithmetic operators are defined between scalar/scalar, vector/scalar,
+and vector/vector value pairs.
+
+**Between two scalars**, the behavior is obvious: they evaluate to another
+scalar that is the result of the operator applied to both scalar operands.
+
+**Between an instant vector and a scalar**, the operator is applied to the
+value of every data sample in the vector. E.g. if a time series instant vector
+is multiplied by 2, the result is another vector in which every sample value of
+the original vector is multiplied by 2.
+
+**Between two instant vectors**, a binary arithmetic operator is applied to
+each entry in the left-hand-side vector and its [matching element](#vector-matching)
+in the right hand vector. The result is propagated into the result vector and the metric
+name is dropped. Entries for which no matching entry in the right-hand vector can be
+found are not part of the result.
+
+### Comparison binary operators
+
+The following binary comparison operators exist in Prometheus:
+
+* `==` (equal)
+* `!=` (not-equal)
+* `>` (greater-than)
+* `<` (less-than)
+* `>=` (greater-or-equal)
+* `<=` (less-or-equal)
+
+Comparison operators are defined between scalar/scalar, vector/scalar,
+and vector/vector value pairs. By default they filter. Their behaviour can be
+modified by providing `bool` after the operator, which will return `0` or `1`
+for the value rather than filtering.
+
+**Between two scalars**, the `bool` modifier must be provided and these
+operators result in another scalar that is either `0` (`false`) or `1`
+(`true`), depending on the comparison result.
+
+**Between an instant vector and a scalar**, these operators are applied to the
+value of every data sample in the vector, and vector elements between which the
+comparison result is `false` get dropped from the result vector. If the `bool`
+modifier is provided, vector elements that would be dropped instead have the value
+`0` and vector elements that would be kept have the value `1`.
+
+**Between two instant vectors**, these operators behave as a filter by default,
+applied to matching entries. Vector elements for which the expression is not
+true or which do not find a match on the other side of the expression get
+dropped from the result, while the others are propagated into a result vector
+with their original (left-hand-side) metric names and label values.
+If the `bool` modifier is provided, vector elements that would have been
+dropped instead have the value `0` and vector elements that would be kept have
+the value `1` with the left-hand-side metric names and label values.
+
+### Logical/set binary operators
+
+These logical/set binary operators are only defined between instant vectors:
+
+* `and` (intersection)
+* `or` (union)
+* `unless` (complement)
+
+`vector1 and vector2` results in a vector consisting of the elements of
+`vector1` for which there are elements in `vector2` with exactly matching
+label sets. Other elements are dropped. The metric name and values are carried
+over from the left-hand-side vector.
+
+`vector1 or vector2` results in a vector that contains all original elements
+(label sets + values) of `vector1` and additionally all elements of `vector2`
+which do not have matching label sets in `vector1`.
+
+`vector1 unless vector2` results in a vector consisting of the elements of
+`vector1` for which there are no elements in `vector2` with exactly matching
+label sets. All matching elements in both vectors are dropped.
+
+## Vector matching
+
+Operations between vectors attempt to find a matching element in the right-hand-side
+vector for each entry in the left-hand side. There are two basic types of
+matching behavior:
+
+**One-to-one** finds a unique pair of entries from each side of the operation.
+In the default case, that is an operation following the format `vector1 <operator> vector2`.
+Two entries match if they have the exact same set of labels and corresponding values.
+The `ignoring` keyword allows ignoring certain labels when matching, while the
+`on` keyword allows reducing the set of considered labels to a provided list:
+
+    <vector expr> <bin-op> ignoring(<label list>) <vector expr>
+    <vector expr> <bin-op> on(<label list>) <vector expr>
+
+Example input:
+
+    method_code:http_errors:rate5m{method="get", code="500"}  24
+    method_code:http_errors:rate5m{method="get", code="404"}  30
+    method_code:http_errors:rate5m{method="put", code="501"}  3
+    method_code:http_errors:rate5m{method="post", code="500"} 6
+    method_code:http_errors:rate5m{method="post", code="404"} 21
+
+    method:http_requests:rate5m{method="get"}  600
+    method:http_requests:rate5m{method="del"}  34
+    method:http_requests:rate5m{method="post"} 120
+
+Example query:
+
+    method_code:http_errors:rate5m{code="500"} / ignoring(code) method:http_requests:rate5m
+
+This returns a result vector containing the fraction of HTTP requests with status code
+of 500 for each method, as measured over the last 5 minutes. Without `ignoring(code)` there
+would have been no match as the metrics do not share the same set of labels.
+The entries with methods `put` and `del` have no match and will not show up in the result:
+
+    {method="get"}  0.04            //  24 / 600
+    {method="post"} 0.05            //   6 / 120
+
+**Many-to-one** and **one-to-many** matchings refer to the case where each vector element on
+the "one"-side can match with multiple elements on the "many"-side. This has to
+be explicitly requested using the `group_left` or `group_right` modifier, where
+left/right determines which vector has the higher cardinality.
+
+    <vector expr> <bin-op> ignoring(<label list>) group_left(<label list>) <vector expr>
+    <vector expr> <bin-op> ignoring(<label list>) group_right(<label list>) <vector expr>
+    <vector expr> <bin-op> on(<label list>) group_left(<label list>) <vector expr>
+    <vector expr> <bin-op> on(<label list>) group_right(<label list>) <vector expr>
+
+The label list provided with the group modifier contains additional labels from
+the "one"-side to be included in the result metrics. For `on` a label can only
+appear in one of the lists. Every time series of the result vector must be
+uniquely identifiable.
+
+_Grouping modifiers can only be used for
+[comparison](#comparison-binary-operators) and
+[arithmetic](#arithmetic-binary-operators). Operations as `and`, `unless` and
+`or` operations match with all possible entries in the right vector by
+default._
+
+Example query:
+
+    method_code:http_errors:rate5m / ignoring(code) group_left method:http_requests:rate5m
+
+In this case the left vector contains more than one entry per `method` label
+value. Thus, we indicate this using `group_left`. The elements from the right
+side are now matched with multiple elements with the same `method` label on the
+left:
+
+    {method="get", code="500"}  0.04            //  24 / 600
+    {method="get", code="404"}  0.05            //  30 / 600
+    {method="post", code="500"} 0.05            //   6 / 120
+    {method="post", code="404"} 0.175           //  21 / 120
+
+_Many-to-one and one-to-many matching are advanced use cases that should be carefully considered.
+Often a proper use of `ignoring(<labels>)` provides the desired outcome._
+
+## Aggregation operators
+
+Prometheus supports the following built-in aggregation operators that can be
+used to aggregate the elements of a single instant vector, resulting in a new
+vector of fewer elements with aggregated values:
+
+* `sum` (calculate sum over dimensions)
+* `min` (select minimum over dimensions)
+* `max` (select maximum over dimensions)
+* `avg` (calculate the average over dimensions)
+* `stddev` (calculate population standard deviation over dimensions)
+* `stdvar` (calculate population standard variance over dimensions)
+* `count` (count number of elements in the vector)
+* `count_values` (count number of elements with the same value)
+* `bottomk` (smallest k elements by sample value)
+* `topk` (largest k elements by sample value)
+* `quantile` (calculate φ-quantile (0 ≤ φ ≤ 1) over dimensions)
+
+These operators can either be used to aggregate over **all** label dimensions
+or preserve distinct dimensions by including a `without` or `by` clause.
+
+    <aggr-op>([parameter,] <vector expression>) [without|by (<label list>)] [keep_common]
+
+`parameter` is only required for `count_values`, `quantile`, `topk` and
+`bottomk`. `without` removes the listed labels from the result vector, while
+all other labels are preserved the output. `by` does the opposite and drops
+labels that are not listed in the `by` clause, even if their label values are
+identical between all elements of the vector. The `keep_common` clause allows
+keeping those extra labels (labels that are identical between elements, but not
+in the `by` clause).
+
+`count_values` outputs one time series per unique sample value. Each series has
+an additional label. The name of that label is given by the aggregation
+parameter, and the label value is the unique sample value.  The value of each
+time series is the number of times that sample value was present.
+
+`topk` and `bottomk` are different from other aggregators in that a subset of
+the input samples, including the original labels, are returned in the result
+vector. `by` and `without` are only used to bucket the input vector.
+
+Example:
+
+If the metric `http_requests_total` had time series that fan out by
+`application`, `instance`, and `group` labels, we could calculate the total
+number of seen HTTP requests per application and group over all instances via:
+
+    sum(http_requests_total) without (instance)
+
+If we are just interested in the total of HTTP requests we have seen in **all**
+applications, we could simply write:
+
+    sum(http_requests_total)
+
+To count the number of binaries running each build version we could write:
+
+    count_values("version", build_version)
+
+To get the 5 largest HTTP requests counts across all instances we could write:
+
+    topk(5, http_requests_total)
+
+## Binary operator precedence
+
+The following list shows the precedence of binary operators in Prometheus, from
+highest to lowest.
+
+1. `^`
+2. `*`, `/`, `%`
+3. `+`, `-`
+4. `==`, `!=`, `<=`, `<`, `>=`, `>`
+5. `and`, `unless`
+6. `or`
+
+Operators on the same precedence level are left-associative. For example,
+`2 * 3 % 2` is equivalent to `(2 * 3) % 2`. However `^` is right associative,
+so `2 ^ 3 ^ 2` is equivalent to `2 ^ (3 ^ 2)`.
diff --git a/docs/querying/rules.md b/docs/querying/rules.md
new file mode 100644
index 0000000000..a1a413c707
--- /dev/null
+++ b/docs/querying/rules.md
@@ -0,0 +1,66 @@
+---
+title: Recording rules
+sort_rank: 6
+---
+
+# Defining recording rules
+
+## Configuring rules
+
+Prometheus supports two types of rules which may be configured and then
+evaluated at regular intervals: recording rules and [alerting
+rules](https://prometheus.io/docs/alerting/rules/). To include rules in
+Prometheus, create a file containing the necessary rule statements and have
+Prometheus load the file via the `rule_files` field in the [Prometheus
+configuration](../configuration.md).
+
+The rule files can be reloaded at runtime by sending `SIGHUP` to the Prometheus
+process. The changes are only applied if all rule files are well-formatted.
+
+## Syntax-checking rules
+
+To quickly check whether a rule file is syntactically correct without starting
+a Prometheus server, install and run Prometheus's `promtool` command-line
+utility tool:
+
+```bash
+go get github.com/prometheus/prometheus/cmd/promtool
+promtool check-rules /path/to/example.rules
+```
+
+When the file is syntactically valid, the checker prints a textual
+representation of the parsed rules to standard output and then exits with
+a `0` return status.
+
+If there are any syntax errors, it prints an error message to standard error
+and exits with a `1` return status. On invalid input arguments the exit status
+is `2`.
+
+## Recording rules
+
+Recording rules allow you to precompute frequently needed or computationally
+expensive expressions and save their result as a new set of time series.
+Querying the precomputed result will then often be much faster than executing
+the original expression every time it is needed. This is especially useful for
+dashboards, which need to query the same expression repeatedly every time they
+refresh.
+
+To add a new recording rule, add a line of the following syntax to your rule
+file:
+
+    <new time series name>[{<label overrides>}] = <expression to record>
+
+Some examples:
+
+    # Saving the per-job HTTP in-progress request count as a new set of time series:
+    job:http_inprogress_requests:sum = sum(http_inprogress_requests) by (job)
+
+    # Drop or rewrite labels in the result time series:
+    new_time_series{label_to_change="new_value",label_to_drop=""} = old_time_series
+
+Recording rules are evaluated at the interval specified by the
+`evaluation_interval` field in the Prometheus configuration. During each
+evaluation cycle, the right-hand-side expression of the rule statement is
+evaluated at the current instant in time and the resulting sample vector is
+stored as a new set of time series with the current timestamp and a new metric
+name (and perhaps an overridden set of labels).

From 4d30a11ab61e1f61f8eceb0b7104536db22ac90a Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <tobidt@gmail.com>
Date: Thu, 26 Oct 2017 22:33:45 +0200
Subject: [PATCH 09/11] Import storage and federation documentation from docs

---
 docs/federation.md |  81 ++++++++++
 docs/index.md      |   2 +
 docs/storage.md    | 357 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 440 insertions(+)
 create mode 100644 docs/federation.md
 create mode 100644 docs/storage.md

diff --git a/docs/federation.md b/docs/federation.md
new file mode 100644
index 0000000000..283f044abe
--- /dev/null
+++ b/docs/federation.md
@@ -0,0 +1,81 @@
+---
+title: Federation
+sort_rank: 6
+---
+
+# Federation
+
+Federation allows a Prometheus server to scrape selected time series from
+another Prometheus server.
+
+## Use cases
+
+There are different use cases for federation. Commonly, it is used to either
+achieve scalable Prometheus monitoring setups or to pull related metrics from
+one service's Prometheus into another.
+
+### Hierarchical federation
+
+Hierarchical federation allows Prometheus to scale to environments with tens of
+data centers and millions of nodes. In this use case, the federation topology
+resembles a tree, with higher-level Prometheus servers collecting aggregated
+time series data from a larger number of subordinated servers.
+
+For example, a setup might consist of many per-datacenter Prometheus servers
+that collect data in high detail (instance-level drill-down), and a set of
+global Prometheus servers which collect and store only aggregated data
+(job-level drill-down) from those local servers. This provides an aggregate
+global view and detailed local views.
+
+### Cross-service federation
+
+In cross-service federation, a Prometheus server of one service is configured
+to scrape selected data from another service's Prometheus server to enable
+alerting and queries against both datasets within a single server.
+
+For example, a cluster scheduler running multiple services might expose
+resource usage information (like memory and CPU usage) about service instances
+running on the cluster. On the other hand, a service running on that cluster
+will only expose application-specific service metrics. Often, these two sets of
+metrics are scraped by separate Prometheus servers. Using federation, the
+Prometheus server containing service-level metrics may pull in the cluster
+resource usage metrics about its specific service from the cluster Prometheus,
+so that both sets of metrics can be used within that server.
+
+## Configuring federation
+
+On any given Prometheus server, the `/federate` endpoint allows retrieving the
+current value for a selected set of time series in that server. At least one
+`match[]` URL parameter must be specified to select the series to expose. Each
+`match[]` argument needs to specify an
+[instant vector selector](querying/basics.md#instant-vector-selectors) like
+`up` or `{job="api-server"}`. If multiple `match[]` parameters are provided,
+the union of all matched series is selected.
+
+To federate metrics from one server to another, configure your destination
+Prometheus server to scrape from the `/federate` endpoint of a source server,
+while also enabling the `honor_labels` scrape option (to not overwrite any
+labels exposed by the source server) and passing in the desired `match[]`
+parameters. For example, the following `scrape_config` federates any series
+with the label `job="prometheus"` or a metric name starting with `job:` from
+the Prometheus servers at `source-prometheus-{1,2,3}:9090` into the scraping
+Prometheus:
+
+```yaml
+- job_name: 'federate'
+  scrape_interval: 15s
+
+  honor_labels: true
+  metrics_path: '/federate'
+
+  params:
+    'match[]':
+      - '{job="prometheus"}'
+      - '{__name__=~"job:.*"}'
+
+  static_configs:
+    - targets:
+      - 'source-prometheus-1:9090'
+      - 'source-prometheus-2:9090'
+      - 'source-prometheus-3:9090'
+```
diff --git a/docs/index.md b/docs/index.md
index 8641cd1b07..abab28508a 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -15,3 +15,5 @@ The documentation is available alongside all the project documentation at
 - [Getting started](getting_started.md)
 - [Configuration](configuration.md)
 - [Querying](querying/basics.md)
+- [Storage](storage.md)
+- [Federation](federation.md)
diff --git a/docs/storage.md b/docs/storage.md
new file mode 100644
index 0000000000..392e0c5a7a
--- /dev/null
+++ b/docs/storage.md
@@ -0,0 +1,357 @@
+---
+title: Storage
+sort_rank: 5
+---
+
+# Storage
+
+Prometheus has a sophisticated local storage subsystem. For indexes,
+it uses [LevelDB](https://github.com/google/leveldb). For the bulk
+sample data, it has its own custom storage layer, which organizes
+sample data in chunks of constant size (1024 bytes payload). These
+chunks are then stored on disk in one file per time series.
+
+This sections deals with the various configuration settings and issues you
+might run into. To dive deeper into the topic, check out the following talks:
+
+* [The Prometheus Time Series Database](https://www.youtube.com/watch?v=HbnGSNEjhUc).
+* [Configuring Prometheus for High Performance](https://www.youtube.com/watch?v=hPC60ldCGm8).
+
+## Memory usage
+
+Prometheus keeps all the currently used chunks in memory. In addition, it keeps
+as many most recently used chunks in memory as possible. You have to tell
+Prometheus how much memory it may use for this caching. The flag
+`storage.local.target-heap-size` allows you to set the heap size (in bytes)
+Prometheus aims not to exceed. Note that the amount of physical memory the
+Prometheus server will use is the result of complex interactions of the Go
+runtime and the operating system and very hard to predict precisely. As a rule
+of thumb, you should have at least 50% headroom in physical memory over the
+configured heap size. (Or, in other words, set `storage.local.target-heap-size`
+to a value of two thirds of the physical memory limit Prometheus should not
+exceed.)
+
+The default value of `storage.local.target-heap-size` is 2GiB and thus tailored
+to 3GiB of physical memory usage. If you have less physical memory available,
+you have to lower the flag value. If you have more memory available, you should
+raise the value accordingly. Otherwise, Prometheus will not make use of the
+memory and thus will perform much worse than it could.
+
+Because Prometheus uses most of its heap for long-lived allocations of memory
+chunks, the
+[garbage collection target percentage](https://golang.org/pkg/runtime/debug/#SetGCPercent)
+is set to 40 by default. You can still override this setting via the `GOGC`
+environment variable as usual. If you need to conserve CPU capacity and can
+accept running with fewer memory chunks, try higher values.
+
+For high-performance set-ups, you might need to adjust more flags. Please read
+through the sections below for details.
+
+NOTE: Prior to v1.6, there was no flag `storage.local.target-heap-size`.
+Instead, the number of chunks kept in memory had to be configured using the
+flags `storage.local.memory-chunks` and `storage.local.max-chunks-to-persist`.
+These flags still exist for compatibility reasons. However,
+`storage.local.max-chunks-to-persist` has no effect anymore, and if
+`storage.local.memory-chunks` is set to a non-zero value _x_, it is used to
+override the value for `storage.local.target-heap-size` to 3072*_x_.
+
+## Disk usage
+
+Prometheus stores its on-disk time series data under the directory specified by
+the flag `storage.local.path`. The default path is `./data` (relative to the
+working directory), which is good to try something out quickly but most likely
+not what you want for actual operations. The flag `storage.local.retention`
+allows you to configure the retention time for samples. Adjust it to your needs
+and your available disk space.
+
+## Chunk encoding
+
+Prometheus currently offers three different types of chunk encodings. The chunk
+encoding for newly created chunks is determined by the
+`-storage.local.chunk-encoding-version` flag. The valid values are 0, 1,
+or 2.
+
+Type 0 is the simple delta encoding implemented for Prometheus's first chunked
+storage layer. Type 1 is the current default encoding, a double-delta encoding
+with much better compression behavior than type 0. Both encodings feature a
+fixed byte width per sample over the whole chunk, which allows fast random
+access. While type 0 is the fastest encoding, the difference in encoding cost
+compared to encoding 1 is tiny. Due to the better compression behavior of type
+1, there is really no reason to select type 0 except compatibility with very
+old Prometheus versions.
+
+Type 2 is a variable bit-width encoding, i.e. each sample in the chunk can use
+a different number of bits. Timestamps are double-delta encoded, too, but with
+a slightly different algorithm. A number of different encoding schemes are
+available for sample values. The choice is made per chunk based on the nature
+of the sample values (constant, integer, regularly increasing, random…). Major
+parts of the type 2 encoding are inspired by a paper published by Facebook
+engineers:
+[_Gorilla: A Fast, Scalable, In-Memory Time Series Database_](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
+
+With type 2, access within a chunk has to happen sequentially, and the encoding
+and decoding cost is a bit higher. Overall, type 2 will cause more CPU usage
+and increased query latency compared to type 1 but offers a much improved
+compression ratio. The exact numbers depend heavily on the data set and the
+kind of queries. Below are results from a typical production server with a
+fairly expensive set of recording rules.
+
+Chunk type | bytes per sample | cores | rule evaluation duration
+:------:|:-----:|:----:|:----:
+1 | 3.3 | 1.6 | 2.9s
+2 | 1.3 | 2.4 | 4.9s
+
+You can change the chunk encoding each time you start the server, so
+experimenting with your own use case is encouraged. Take into account, however,
+that only newly created chunks will use the newly selected chunk encoding, so
+it will take a while until you see the effects.
+
+For more details about the trade-off between the chunk encodings, see
+[this blog post](https://prometheus.io/blog/2016/05/08/when-to-use-varbit-chunks/).
+
+## Settings for high numbers of time series
+
+Prometheus can handle millions of time series. However, with the above
+mentioned default setting for `storage.local.target-heap-size`, you will be
+limited to about 200,000 time series simultaneously present in memory. For more
+series, you need more memory, and you need to configure Prometheus to make use
+of it as described above.
+
+Each of the aforementioned chunks contains samples of a single time series. A
+time series is thus represented as a series of chunks, which ultimately end up
+in a time series file (one file per time series) on disk.
+
+A series that has recently received new samples will have an open incomplete
+_head chunk_. Once that chunk is completely filled, or the series hasn't
+received samples in a while, the head chunk is closed and becomes a chunk
+waiting to be appended to its corresponding series file, i.e. it is _waiting
+for persistence_. After the chunk has been persisted to disk, it becomes
+_evictable_, provided it is not currently used by a query. Prometheus will
+evict evictable chunks from memory to satisfy the configured target heap
+size. A series with an open head chunk is called an _active series_. This is
+different from a _memory series_, which also includes series without an open
+head chunk but still other chunks in memory (whether waiting for persistence,
+used in a query, or evictable). A series without any chunks in memory may be
+_archived_, upon which it ceases to have any mandatory memory footprint.
+
+The amount of chunks Prometheus can keep in memory depends on the flag value
+for `storage.local.target-heap-size` and on the amount of memory used by
+everything else. If there are not enough chunks evictable to satisfy the target
+heap size, Prometheus will throttle ingestion of more samples (by skipping
+scrapes and rule evaluations) until the heap has shrunk enough. _Throttled
+ingestion is really bad for various reasons. You really do not want to be in
+that situation._
+
+Open head chunks, chunks still waiting for persistence, and chunks being used
+in a query are not evictable. Thus, the reasons for the inability to evict
+enough chunks include the following:
+
+1. Queries that use too many chunks.
+2. Chunks are piling up waiting for persistence because the storage layer
+   cannot keep up writing chunks.
+3. There are too many active time series, which results in too many open head
+   chunks.
+
+Currently, Prometheus has no defence against case (1). Abusive queries will
+essentially OOM the server.
+
+To defend against case (2), there is a concept of persistence urgency explained
+in the next section.
+
+Case (3) depends on the targets you monitor. To mitigate an unplanned explosion
+of the number of series, you can limit the number of samples per individual
+scrape (see `sample_limit` in the [scrape config](configuration.md#scrape_config)).
+If the number of active time series exceeds the number of memory chunks the
+Prometheus server can afford, the server will quickly throttle ingestion as
+described above. The only way out of this is to give Prometheus more RAM or
+reduce the number of time series to ingest.
+
+In fact, you want many more memory chunks than you have series in
+memory. Prometheus tries to batch up disk writes as much as possible as it
+helps for both HDD (write as much as possible after each seek) and SSD (tiny
+writes create write amplification, which limits the effective throughput and
+burns much more quickly through the lifetime of the device). The more
+Prometheus can batch up writes, the more efficient is the process of persisting
+chunks to disk. which helps case (2).
+
+In conclusion, to keep the Prometheus server healthy, make sure it has plenty
+of headroom of memory chunks available for the number of memory series. A
+factor of three is a good starting point. Refer to the
+[section about helpful metrics](#helpful-metrics) to find out what to look
+for. A very broad rule of thumb for an upper limit of memory series is the
+total available physical memory divided by 10,000, e.g. About 6M memory series
+on a 64GiB server.
+
+If you combine a high number of time series with very fast and/or large
+scrapes, the number of pre-allocated mutexes for series locking might not be
+sufficient. If you see scrape hiccups while Prometheus is writing a checkpoint
+or processing expensive queries, try increasing the value of the
+`storage.local.num-fingerprint-mutexes` flag. Sometimes tens of thousands or
+even more are required.
+
+PromQL queries that involve a high number of time series will make heavy use of
+the LevelDB-backed indexes. If you need to run queries of that kind, tweaking
+the index cache sizes might be required. The following flags are relevant:
+
+* `-storage.local.index-cache-size.label-name-to-label-values`: For regular
+  expression matching.
+* `-storage.local.index-cache-size.label-pair-to-fingerprints`: Increase the
+  size if a large number of time series share the same label pair or name.
+* `-storage.local.index-cache-size.fingerprint-to-metric` and
+  `-storage.local.index-cache-size.fingerprint-to-timerange`: Increase the size
+  if you have a large number of archived time series, i.e. series that have not
+  received samples in a while but are still not old enough to be purged
+  completely.
+
+You have to experiment with the flag values to find out what helps. If a query
+touches 100,000+ time series, hundreds of MiB might be reasonable. If you have
+plenty of memory available, using more of it for LevelDB cannot harm. More
+memory for LevelDB will effectively reduce the number of memory chunks
+Prometheus can afford.
+
+## Persistence urgency and “rushed mode”
+
+Naively, Prometheus would all the time try to persist completed chunk to disk
+as soon as possible. Such a strategy would lead to many tiny write operations,
+using up most of the I/O bandwidth and keeping the server quite busy. Spinning
+disks will appear to be very slow because of the many slow seeks required, and
+SSDs will suffer from write amplification. Prometheus tries instead to batch up
+write operations as much as possible, which works better if it is allowed to
+use more memory.
+
+Prometheus will also sync series files after each write (with
+`storage.local.series-sync-strategy=adaptive`, which is the default) and use
+the disk bandwidth for more frequent checkpoints (based on the count of “dirty
+series”, see [below](#crash-recovery)), both attempting to minimize data loss
+in case of a crash.
+
+But what to do if the number of chunks waiting for persistence grows too much?
+Prometheus calculates a score for urgency to persist chunks. The score is
+between 0 and 1, where 1 corresponds to the highest urgency. Depending on the
+score, Prometheus will write to disk more frequently. Should the score ever
+pass the threshold of 0.8, Prometheus enters “rushed mode” (which you can see
+in the logs). In rushed mode, the following strategies are applied to speed up
+persisting chunks:
+
+* Series files are not synced after write operations anymore (making better use
+  of the OS's page cache at the price of an increased risk of losing data in
+  case of a server crash – this behavior can be overridden with the flag
+  `storage.local.series-sync-strategy`).
+* Checkpoints are only created as often as configured via the
+  `storage.local.checkpoint-interval` flag (freeing more disk bandwidth for
+  persisting chunks at the price of more data loss in case of a crash and an
+  increased time to run the subsequent crash recovery).
+* Write operations to persist chunks are not throttled anymore and performed as
+  fast as possible.
+
+Prometheus leaves rushed mode once the score has dropped below 0.7.
+
+Throttling of ingestion happens if the urgency score reaches 1. Thus, the
+rushed mode is not _per se_ something to be avoided. It is, on the contrary, a
+measure the Prometheus server takes to avoid the really bad situation of
+throttled ingestion. Occasionally entering rushed mode is OK, if it helps and
+ultimately leads to leaving rushed mode again. _If rushed mode is entered but
+the urgency score still goes up, the server has a real problem._
+
+## Settings for very long retention time
+
+If you have set a very long retention time via the `storage.local.retention`
+flag (more than a month), you might want to increase the flag value
+`storage.local.series-file-shrink-ratio`.
+
+Whenever Prometheus needs to cut off some chunks from the beginning of a series
+file, it will simply rewrite the whole file. (Some file systems support “head
+truncation”, which Prometheus currently does not use for several reasons.) To
+not rewrite a very large series file to get rid of very few chunks, the rewrite
+only happens if at least 10% of the chunks in the series file are removed. This
+value can be changed via the mentioned `storage.local.series-file-shrink-ratio`
+flag. If you have a lot of disk space but want to minimize rewrites (at the
+cost of wasted disk space), increase the flag value to higher values, e.g. 0.3
+for 30% of required chunk removal.
+
+## Crash recovery
+
+Prometheus saves chunks to disk as soon as possible after they are
+complete. Incomplete chunks are saved to disk during regular
+checkpoints. You can configure the checkpoint interval with the flag
+`storage.local.checkpoint-interval`. Prometheus creates checkpoints
+more frequently than that if too many time series are in a “dirty”
+state, i.e. their current incomplete head chunk is not the one that is
+contained in the most recent checkpoint. This limit is configurable
+via the `storage.local.checkpoint-dirty-series-limit` flag.
+
+More active time series to cycle through lead in general to more chunks waiting
+for persistence, which in turns leads to larger checkpoints and ultimately more
+time needed for checkpointing. There is a clear trade-off between limiting the
+loss of data in case of a crash and the ability to scale to high number of
+active time series. To not spend the majority of the disk throughput for
+checkpointing, you have to increase the checkpoint interval. Prometheus itself
+limits the time spent in checkpointing to 50% by waiting after each
+checkpoint's completion for at least as long as the previous checkpoint took.
+
+Nevertheless, should your server crash, you might still lose data, and
+your storage might be left in an inconsistent state. Therefore,
+Prometheus performs a crash recovery after an unclean shutdown,
+similar to an `fsck` run for a file system. Details about the crash
+recovery are logged, so you can use it for forensics if required. Data
+that cannot be recovered is moved to a directory called `orphaned`
+(located under `storage.local.path`). Remember to delete that data if
+you do not need it anymore.
+
+The crash recovery usually takes less than a minute. Should it take much
+longer, consult the log to find out what is going on. With increasing number of
+time series in the storage (archived or not), the re-indexing tends to dominate
+the recovery time and can take tens of minutes in extreme cases.
+
+## Data corruption
+
+If you suspect problems caused by corruption in the database, you can
+enforce a crash recovery by starting the server with the flag
+`storage.local.dirty`.
+
+If that does not help, or if you simply want to erase the existing
+database, you can easily start fresh by deleting the contents of the
+storage directory:
+
+   1. Stop Prometheus.
+   1. `rm -r <storage path>/*`
+   1. Start Prometheus.
+
+## Helpful metrics
+
+Out of the metrics that Prometheus exposes about itself, the following are
+particularly useful to tweak flags and find out about the required
+resources. They also help to create alerts to find out in time if a Prometheus
+server has problems or is out of capacity.
+
+* `prometheus_local_storage_memory_series`: The current number of series held
+  in memory.
+* `prometheus_local_storage_open_head_chunks`: The number of open head chunks.
+* `prometheus_local_storage_chunks_to_persist`: The number of memory chunks
+  that still need to be persisted to disk.
+* `prometheus_local_storage_memory_chunks`: The current number of chunks held
+  in memory. If you substract the previous two, you get the number of persisted
+  chunks (which are evictable if not currently in use by a query).
+* `prometheus_local_storage_series_chunks_persisted`: A histogram of the number
+  of chunks persisted per batch.
+* `prometheus_local_storage_persistence_urgency_score`: The urgency score as
+  discussed [above](#persistence-urgency-and-rushed-mode).
+* `prometheus_local_storage_rushed_mode` is 1 if Prometheus is in “rushed
+  mode”, 0 otherwise. Can be used to calculate the percentage of time
+  Prometheus is in rushed mode.
+* `prometheus_local_storage_checkpoint_last_duration_seconds`: How long the
+  last checkpoint took.
+* `prometheus_local_storage_checkpoint_last_size_bytes`: Size of the last
+  checkpoint in bytes.
+* `prometheus_local_storage_checkpointing` is 1 while Prometheus is
+  checkpointing, 0 otherwise. Can be used to calculate the percentage of time
+  Prometheus is checkpointing.
+* `prometheus_local_storage_inconsistencies_total`: Counter for storage
+  inconsistencies found. If this is greater than 0, restart the server for
+  recovery.
+* `prometheus_local_storage_persist_errors_total`: Counter for persist errors.
+* `prometheus_local_storage_memory_dirty_series`: Current number of dirty series.
+* `process_resident_memory_bytes`: Broadly speaking the physical memory
+  occupied by the Prometheus process.
+* `go_memstats_alloc_bytes`: Go heap size (allocated objects in use plus allocated
+  objects not in use anymore but not yet garbage-collected).

From f432b8176d0e69027ab46ad7ac434764da743a11 Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <tobidt@gmail.com>
Date: Fri, 27 Oct 2017 09:47:38 +0200
Subject: [PATCH 10/11] Consolidate configuration and rules docs in
 docs/configuration/

---
 docs/configuration/alerting_rules.md          | 98 +++++++++++++++++++
 docs/{ => configuration}/configuration.md     |  4 +-
 docs/configuration/index.md                   |  4 +
 .../recording_rules.md}                       |  9 +-
 docs/getting_started.md                       |  2 +-
 docs/index.md                                 |  2 +-
 docs/querying/basics.md                       |  2 +-
 docs/storage.md                               |  2 +-
 8 files changed, 112 insertions(+), 11 deletions(-)
 create mode 100644 docs/configuration/alerting_rules.md
 rename docs/{ => configuration}/configuration.md (99%)
 create mode 100644 docs/configuration/index.md
 rename docs/{querying/rules.md => configuration/recording_rules.md} (89%)

diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md
new file mode 100644
index 0000000000..905b262031
--- /dev/null
+++ b/docs/configuration/alerting_rules.md
@@ -0,0 +1,98 @@
+---
+title: Alerting rules
+sort_rank: 3
+---
+
+# Alerting rules
+
+Alerting rules allow you to define alert conditions based on Prometheus
+expression language expressions and to send notifications about firing alerts
+to an external service. Whenever the alert expression results in one or more
+vector elements at a given point in time, the alert counts as active for these
+elements' label sets.
+
+Alerting rules are configured in Prometheus in the same way as [recording
+rules](recording_rules.md).
+
+### Defining alerting rules
+
+Alerting rules are defined in the following syntax:
+
+    ALERT <alert name>
+      IF <expression>
+      [ FOR <duration> ]
+      [ LABELS <label set> ]
+      [ ANNOTATIONS <label set> ]
+
+The alert name must be a valid metric name.
+
+The optional `FOR` clause causes Prometheus to wait for a certain duration
+between first encountering a new expression output vector element (like an
+instance with a high HTTP error rate) and counting an alert as firing for this
+element. Elements that are active, but not firing yet, are in pending state.
+
+The `LABELS` clause allows specifying a set of additional labels to be attached
+to the alert. Any existing conflicting labels will be overwritten. The label
+values can be templated.
+
+The `ANNOTATIONS` clause specifies another set of labels that are not
+identifying for an alert instance. They are used to store longer additional
+information such as alert descriptions or runbook links. The annotation values
+can be templated.
+
+#### Templating
+
+Label and annotation values can be templated using [console templates](https://prometheus.io/docs/visualization/consoles).
+The `$labels` variable holds the label key/value pairs of an alert instance
+and `$value` holds the evaluated value of an alert instance.
+
+    # To insert a firing element's label values:
+    {{ $labels.<labelname> }}
+    # To insert the numeric expression value of the firing element:
+    {{ $value }}
+
+Examples:
+
+    # Alert for any instance that is unreachable for >5 minutes.
+    ALERT InstanceDown
+      IF up == 0
+      FOR 5m
+      LABELS { severity = "page" }
+      ANNOTATIONS {
+        summary = "Instance {{ $labels.instance }} down",
+        description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.",
+      }
+
+    # Alert for any instance that have a median request latency >1s.
+    ALERT APIHighRequestLatency
+      IF api_http_request_latencies_second{quantile="0.5"} > 1
+      FOR 1m
+      ANNOTATIONS {
+        summary = "High request latency on {{ $labels.instance }}",
+        description = "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)",
+      }
+
+### Inspecting alerts during runtime
+
+To manually inspect which alerts are active (pending or firing), navigate to
+the "Alerts" tab of your Prometheus instance. This will show you the exact
+label sets for which each defined alert is currently active.
+
+For pending and firing alerts, Prometheus also stores synthetic time series of
+the form `ALERTS{alertname="<alert name>", alertstate="pending|firing", <additional alert labels>}`.
+The sample value is set to `1` as long as the alert is in the indicated active
+(pending or firing) state, and a single `0` value gets written out when an alert
+transitions from active to inactive state. Once inactive, the time series does
+not get further updates.
+
+### Sending alert notifications
+
+Prometheus's alerting rules are good at figuring what is broken *right now*, but
+they are not a fully-fledged notification solution. Another layer is needed to
+add summarization, notification rate limiting, silencing and alert dependencies
+on top of the simple alert definitions. In Prometheus's ecosystem, the
+[Alertmanager](https://prometheus.io/docs/alerting/alertmanager/) takes on this
+role. Thus, Prometheus may be configured to periodically send information about
+alert states to an Alertmanager instance, which then takes care of dispatching
+the right notifications. The Alertmanager instance may be configured via the
+`-alertmanager.url` command line flag.
diff --git a/docs/configuration.md b/docs/configuration/configuration.md
similarity index 99%
rename from docs/configuration.md
rename to docs/configuration/configuration.md
index 4efd392c77..d2c4d08186 100644
--- a/docs/configuration.md
+++ b/docs/configuration/configuration.md
@@ -1,6 +1,6 @@
 ---
 title: Configuration
-sort_rank: 3
+sort_rank: 1
 ---
 
 # Configuration
@@ -10,7 +10,7 @@ the command-line flags configure immutable system parameters (such as storage
 locations, amount of data to keep on disk and in memory, etc.), the
 configuration file defines everything related to scraping [jobs and their
 instances](https://prometheus.io/docs/concepts/jobs_instances/), as well as
-which [rule files to load](querying/rules.md#configuring-rules).
+which [rule files to load](recording_rules.md#configuring-rules).
 
 To view all available command-line flags, run `prometheus -h`.
 
diff --git a/docs/configuration/index.md b/docs/configuration/index.md
new file mode 100644
index 0000000000..5cfaf2a556
--- /dev/null
+++ b/docs/configuration/index.md
@@ -0,0 +1,4 @@
+---
+title: Configuration
+sort_rank: 3
+---
diff --git a/docs/querying/rules.md b/docs/configuration/recording_rules.md
similarity index 89%
rename from docs/querying/rules.md
rename to docs/configuration/recording_rules.md
index a1a413c707..58f9fd91a7 100644
--- a/docs/querying/rules.md
+++ b/docs/configuration/recording_rules.md
@@ -1,6 +1,6 @@
 ---
 title: Recording rules
-sort_rank: 6
+sort_rank: 2
 ---
 
 # Defining recording rules
@@ -9,10 +9,9 @@ sort_rank: 6
 
 Prometheus supports two types of rules which may be configured and then
 evaluated at regular intervals: recording rules and [alerting
-rules](https://prometheus.io/docs/alerting/rules/). To include rules in
-Prometheus, create a file containing the necessary rule statements and have
-Prometheus load the file via the `rule_files` field in the [Prometheus
-configuration](../configuration.md).
+rules](alerting_rules.md). To include rules in Prometheus, create a file
+containing the necessary rule statements and have Prometheus load the file via
+the `rule_files` field in the [Prometheus configuration](configuration.md).
 
 The rule files can be reloaded at runtime by sending `SIGHUP` to the Prometheus
 process. The changes are only applied if all rule files are well-formatted.
diff --git a/docs/getting_started.md b/docs/getting_started.md
index a2518bd43e..78a7d190e8 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -56,7 +56,7 @@ scrape_configs:
 ```
 
 For a complete specification of configuration options, see the
-[configuration documentation](configuration.md).
+[configuration documentation](configuration/configuration.md).
 
 ## Starting Prometheus
 
diff --git a/docs/index.md b/docs/index.md
index abab28508a..b5849c9b15 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -13,7 +13,7 @@ The documentation is available alongside all the project documentation at
 
 - [Installing](install.md)
 - [Getting started](getting_started.md)
-- [Configuration](configuration.md)
+- [Configuration](configuration/configuration.md)
 - [Querying](querying/basics.md)
 - [Storage](storage.md)
 - [Federation](federation.md)
diff --git a/docs/querying/basics.md b/docs/querying/basics.md
index f001c6d0d1..45249e0211 100644
--- a/docs/querying/basics.md
+++ b/docs/querying/basics.md
@@ -204,7 +204,7 @@ Prometheus's expression browser until the result set seems reasonable
 (hundreds, not thousands, of time series at most).  Only when you have filtered
 or aggregated your data sufficiently, switch to graph mode. If the expression
 still takes too long to graph ad-hoc, pre-record it via a [recording
-rule](rules.md#recording-rules).
+rule](../configuration/recording_rules.md#recording-rules).
 
 This is especially relevant for Prometheus's query language, where a bare
 metric name selector like `api_http_requests_total` could expand to thousands
diff --git a/docs/storage.md b/docs/storage.md
index 392e0c5a7a..457bc831eb 100644
--- a/docs/storage.md
+++ b/docs/storage.md
@@ -160,7 +160,7 @@ in the next section.
 
 Case (3) depends on the targets you monitor. To mitigate an unplanned explosion
 of the number of series, you can limit the number of samples per individual
-scrape (see `sample_limit` in the [scrape config](configuration.md#scrape_config)).
+scrape (see `sample_limit` in the [scrape config](configuration/configuration.md#scrape_config)).
 If the number of active time series exceeds the number of memory chunks the
 Prometheus server can afford, the server will quickly throttle ingestion as
 described above. The only way out of this is to give Prometheus more RAM or

From f49ae044d7857b8ae2ba1afe1c101684ad9b2db3 Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <tobidt@gmail.com>
Date: Fri, 27 Oct 2017 16:08:38 +0200
Subject: [PATCH 11/11] Import template reference and examples

---
 docs/configuration/template_examples.md  | 116 +++++++++++++++++++++++
 docs/configuration/template_reference.md | 114 ++++++++++++++++++++++
 2 files changed, 230 insertions(+)
 create mode 100644 docs/configuration/template_examples.md
 create mode 100644 docs/configuration/template_reference.md

diff --git a/docs/configuration/template_examples.md b/docs/configuration/template_examples.md
new file mode 100644
index 0000000000..1710c99f89
--- /dev/null
+++ b/docs/configuration/template_examples.md
@@ -0,0 +1,116 @@
+---
+title: Template examples
+sort_rank: 4
+---
+
+# Template examples
+
+Prometheus supports templating in the summary and description fields of
+alerts, as well as in served console pages. Templates have the ability to run
+queries against the local database, iterate over data, use conditionals, format
+data, etc. The Prometheus templating language is based on the
+[Go templating](http://golang.org/pkg/text/template/) system.
+
+## Simple alert field templates
+
+    ALERT InstanceDown
+      IF up == 0
+      FOR 5m
+      LABELS {
+        severity="page"
+      }
+      ANNOTATIONS {
+        summary = "Instance {{$labels.instance}} down",
+        description = "{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 minutes.",
+      }
+
+Alert field templates will be executed during every rule iteration for each
+alert that fires, so keep any queries and templates lightweight. If you have a
+need for more complicated templates for alerts, it is recommended to link to a
+console instead.
+
+## Simple iteration
+
+This displays a list of instances, and whether they are up:
+
+```go
+{{ range query "up" }}
+  {{ .Labels.instance }} {{ .Value }}
+{{ end }}
+```
+
+The special `.` variable contains the value of the current sample for each loop iteration.
+
+## Display one value
+
+```go
+{{ with query "some_metric{instance='someinstance'}" }}
+  {{ . | first | value | humanize }}
+{{ end }}
+```
+
+Go and Go's templating language are both strongly typed, so one must check that
+samples were returned to avoid an execution error. For example this could
+happen if a scrape or rule evaluation has not run yet, or a host was down.
+
+The included `prom_query_drilldown` template handles this, allows for
+formatting of results, and linking to the [expression browser](https://prometheus.io/docs/visualization/browser/).
+
+## Using console URL parameters
+
+```go
+{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" .Params.instance | query }}
+  {{ . | first | value | humanize1024}}B
+{{ end }}
+```
+
+If accessed as `console.html?instance=hostname`, `.Params.instance` will evaluate to `hostname`.
+
+## Advanced iteration
+
+```html
+<table>
+{{ range printf "node_network_receive_bytes{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device"}}
+  <tr><th colspan=2>{{ .Labels.device }}</th></tr>
+  <tr>
+    <td>Received</td>
+    <td>{{ with printf "rate(node_network_receive_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}</td>
+  </tr>
+  <tr>
+    <td>Transmitted</td>
+    <td>{{ with printf "rate(node_network_transmit_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}</td>
+  </tr>{{ end }}
+<table>
+```
+
+Here we iterate over all network devices and display the network traffic for each.
+
+As the `range` action does not specify a variable, `.Params.instance` is not
+available inside the loop as `.` is now the loop variable.
+
+## Defining reusable templates
+
+Prometheus supports defining templates that can be reused. This is particularly
+powerful when combined with
+[console library](template_reference.md#console-templates) support, allowing
+sharing of templates across consoles.
+
+```go
+{{/* Define the template */}}
+{{define "myTemplate"}}
+  do something
+{{end}}
+
+{{/* Use the template */}}
+{{template "myTemplate"}}
+```
+
+Templates are limited to one argument. The `args` function can be used to wrap multiple arguments.
+
+```go
+{{define "myMultiArgTemplate"}}
+  First argument: {{.arg0}}
+  Second argument: {{.arg1}}
+{{end}}
+{{template "myMultiArgTemplate" (args 1 2)}}
+```
diff --git a/docs/configuration/template_reference.md b/docs/configuration/template_reference.md
new file mode 100644
index 0000000000..dabea627f1
--- /dev/null
+++ b/docs/configuration/template_reference.md
@@ -0,0 +1,114 @@
+---
+title: Template reference
+sort_rank: 5
+---
+
+# Template reference
+
+Prometheus supports templating in the summary and description fields of
+alerts, as well as in served console pages. Templates have the ability to run
+queries against the local database, iterate over data, use conditionals, format
+data, etc. The Prometheus templating language is based on the
+[Go templating](http://golang.org/pkg/text/template/) system.
+
+## Data Structures
+
+The primary data structure for dealing with time series data is the sample, defined as:
+
+```go
+type sample struct {
+        Labels map[string]string
+        Value  float64
+}
+```
+
+The metric name of the sample is encoded in a special `__name__` label in the `Labels` map.
+
+`[]sample` means a list of samples.
+
+`interface{}` in Go is similar to a void pointer in C.
+
+## Functions
+
+In addition to the [default
+functions](http://golang.org/pkg/text/template/#hdr-Functions) provided by Go
+templating, Prometheus provides functions for easier processing of query
+results in templates.
+
+If functions are used in a pipeline, the pipeline value is passed as the last argument.
+
+### Queries
+
+| Name          | Arguments     | Returns  | Notes    |
+| ------------- | ------------- | -------- | -------- |
+| query         | query string  | []sample | Queries the database, does not support returning range vectors.  |
+| first         | []sample      | sample   | Equivalent to `index a 0`  |
+| label         | label, sample | string   | Equivalent to `index sample.Labels label`  |
+| value         | sample        | float64  | Equivalent to `sample.Value`  |
+| sortByLabel   | label, []samples | []sample | Sorts the samples by the given label. Is stable.  |
+
+`first`, `label` and `value` are intended to make query results easily usable in pipelines.
+
+### Numbers
+
+| Name          | Arguments     | Returns |  Notes    |
+| ------------- | --------------| --------| --------- |
+| humanize      | number        | string  | Converts a number to a more readable format, using [metric prefixes](http://en.wikipedia.org/wiki/Metric_prefix).
+| humanize1024  | number        | string  | Like `humanize`, but uses 1024 as the base rather than 1000. |
+| humanizeDuration | number     | string  | Converts a duration in seconds to a more readable format. |
+| humanizeTimestamp | number    | string  | Converts a Unix timestamp in seconds to a more readable format. |
+
+Humanizing functions are intended to produce reasonable output for consumption
+by humans, and are not guaranteed to return the same results between Prometheus
+versions.
+
+### Strings
+
+| Name          | Arguments     | Returns |    Notes    |
+| ------------- | ------------- | ------- | ----------- |
+| title         | string        | string  | [strings.Title](http://golang.org/pkg/strings/#Title), capitalises first character of each word.|
+| toUpper       | string        | string  | [strings.ToUpper](http://golang.org/pkg/strings/#ToUpper), converts all characters to upper case.|
+| toLower       | string        | string  | [strings.ToLower](http://golang.org/pkg/strings/#ToLower), converts all characters to lower case.|
+| match         | pattern, text | boolean | [regexp.MatchString](http://golang.org/pkg/regexp/#MatchString) Tests for a unanchored regexp match. |
+| reReplaceAll  | pattern, replacement, text | string | [Regexp.ReplaceAllString](http://golang.org/pkg/regexp/#Regexp.ReplaceAllString) Regexp substitution, unanchored. |
+| graphLink  | expr | string | Returns path to graph view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. |
+| tableLink  | expr | string | Returns path to tabular ("Console") view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. |
+
+### Others
+
+| Name          | Arguments     | Returns |    Notes    |
+| ------------- | ------------- | ------- | ----------- |
+| args          | []interface{} | map[string]interface{} | This converts a list of objects to a map with keys arg0, arg1 etc. This is intended to allow multiple arguments to be passed to templates. |
+| tmpl          | string, []interface{} | nothing  | Like the built-in `template`, but allows non-literals as the template name. Note that the result is assumed to be safe, and will not be auto-escaped. Only available in consoles. |
+| safeHtml      | string        | string  | Marks string as HTML not requiring auto-escaping. |
+
+## Template type differences
+
+Each of the types of templates provide different information that can be used to
+parameterize templates, and have a few other differences.
+
+### Alert field templates
+
+`.Value` and `.Labels` contain the alert value and labels. They are also exposed
+as the `$value` and `$labels` variables for convenience.
+
+### Console templates
+
+Consoles are exposed on `/consoles/`, and sourced from the directory pointed to
+by the `-web.console.templates` flag.
+
+Console templates are rendered with
+[html/template](http://golang.org/pkg/html/template/), which provides
+auto-escaping. To bypass the auto-escaping use the `safe*` functions.,
+
+URL parameters are available as a map in `.Params`. To access multiple URL
+parameters by the same name, `.RawParams` is a map of the list values for each
+parameter. The URL path is available in `.Path`, excluding the `/consoles/`
+prefix.
+
+Consoles also have access to all the templates defined with `{{define
+"templateName"}}...{{end}}` found in `*.lib` files in the directory pointed to
+by the `-web.console.libraries` flag. As this is a shared namespace, take care
+to avoid clashes with other users. Template names beginning with `prom`,
+`_prom`, and `__` are reserved for use by Prometheus, as are the functions
+listed above.