From a03193232a9b0123508628c57b6a10ebb16a56af Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Thu, 5 Oct 2017 12:32:24 +0100 Subject: [PATCH 1/6] Don't disable HTTP keep-alives for remote storage connections. (#3173) Removes configurability introduced in #3160 in favour of hard-coding, per advice from @brian-brazil. --- config/config.go | 2 -- storage/remote/client.go | 7 +------ util/httputil/client.go | 10 ++++++---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/config/config.go b/config/config.go index 983ba1999e..147600e827 100644 --- a/config/config.go +++ b/config/config.go @@ -520,8 +520,6 @@ type HTTPClientConfig struct { ProxyURL URL `yaml:"proxy_url,omitempty"` // TLSConfig to use to connect to the targets. TLSConfig TLSConfig `yaml:"tls_config,omitempty"` - // If set, override whether to use HTTP KeepAlive - scraping defaults OFF, remote read/write defaults ON - KeepAlive *bool `yaml:"keep_alive,omitempty"` // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` diff --git a/storage/remote/client.go b/storage/remote/client.go index 01bcd9c8f5..1f2b07dff6 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -52,12 +52,7 @@ type ClientConfig struct { // NewClient creates a new Client. func NewClient(index int, conf *ClientConfig) (*Client, error) { - // If not specified in config, allow HTTP connections for remote API to use keep-alive - if conf.HTTPClientConfig.KeepAlive == nil { - val := true - conf.HTTPClientConfig.KeepAlive = &val - } - httpClient, err := httputil.NewClientFromConfig(conf.HTTPClientConfig) + httpClient, err := httputil.NewClientFromConfigAndOptions(conf.HTTPClientConfig, false) if err != nil { return nil, err } diff --git a/util/httputil/client.go b/util/httputil/client.go index 1c48030d4e..66ec2b294a 100644 --- a/util/httputil/client.go +++ b/util/httputil/client.go @@ -32,14 +32,16 @@ func NewClient(rt http.RoundTripper) *http.Client { // NewClientFromConfig returns a new HTTP client configured for the // given config.HTTPClientConfig. func NewClientFromConfig(cfg config.HTTPClientConfig) (*http.Client, error) { + return NewClientFromConfigAndOptions(cfg, true) +} + +// NewClientFromConfigAndOptions returns a new HTTP client configured for the +// given config.HTTPClientConfig, optionally disabling HTTP keepalive. +func NewClientFromConfigAndOptions(cfg config.HTTPClientConfig, disableKeepAlives bool) (*http.Client, error) { tlsConfig, err := NewTLSConfig(cfg.TLSConfig) if err != nil { return nil, err } - disableKeepAlives := true // hard-coded default unless overridden in config - if cfg.KeepAlive != nil { - disableKeepAlives = !*cfg.KeepAlive - } // The only timeout we care about is the configured scrape timeout. // It is applied on request. So we leave out any timings here. var rt http.RoundTripper = &http.Transport{ From 7e814ada72dd2d60d44e1b72741ac9e4ba944683 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Thu, 5 Oct 2017 14:56:52 -0700 Subject: [PATCH 2/6] [storage/local] fix timer.Reset usage According to https://golang.org/pkg/time/#Timer.Reset you must not rely on a returned value from Reset. Signed-off-by: Alexander Morozov --- storage/local/storage.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/storage/local/storage.go b/storage/local/storage.go index d221097b3c..e8039faa0f 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -998,7 +998,8 @@ func (s *MemorySeriesStorage) logThrottling() { for { select { case <-s.throttled: - if !timer.Reset(time.Minute) { + if !timer.Stop() { + <-timer.C score, _ := s.getPersistenceUrgencyScore() log. With("urgencyScore", score). @@ -1006,6 +1007,7 @@ func (s *MemorySeriesStorage) logThrottling() { With("memoryChunks", atomic.LoadInt64(&chunk.NumMemChunks)). Error("Storage needs throttling. Scrapes and rule evaluations will be skipped.") } + timer.Reset(time.Minute) case <-timer.C: score, _ := s.getPersistenceUrgencyScore() log. From 128b31d058cefe4907f28de462393fbeffc5440c Mon Sep 17 00:00:00 2001 From: Jack Neely Date: Fri, 6 Oct 2017 06:22:55 -0400 Subject: [PATCH 3/6] Log failure to send NaN values to remote store as Debug (#3235) This was a warning and can be a frequent occurrence. Let's not fill up logs unless we are asked to. --- .../remote_storage/remote_storage_adapter/graphite/client.go | 2 +- .../remote_storage/remote_storage_adapter/opentsdb/client.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go index 45373cf19a..8b96c45b3b 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go @@ -86,7 +86,7 @@ func (c *Client) Write(samples model.Samples) error { t := float64(s.Timestamp.UnixNano()) / 1e9 v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - log.Warnf("cannot send value %f to Graphite,"+ + log.Debugf("cannot send value %f to Graphite,"+ "skipping sample %#v", v, s) continue } diff --git a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go index 4b6c0e6f4a..47aefc7418 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go @@ -75,7 +75,7 @@ func (c *Client) Write(samples model.Samples) error { for _, s := range samples { v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - log.Warnf("cannot send value %f to OpenTSDB, skipping sample %#v", v, s) + log.Debugf("cannot send value %f to OpenTSDB, skipping sample %#v", v, s) continue } metric := TagValue(s.Metric[model.MetricNameLabel]) From 2fad91d25a957d4329140889ed501957c72c3651 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 6 Oct 2017 14:19:30 +0200 Subject: [PATCH 4/6] Avoid blocking in the logThrottling loop The timer semantics is really hard. The simple pattern as given in the godoc for the time package assumes we are not elsewhere consuming from the timer's channel. However, exactly that can happen here with the right sequence of events. Thus, we have to drain the channel only if it has something to drain. --- storage/local/storage.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/storage/local/storage.go b/storage/local/storage.go index e8039faa0f..c1caef67e1 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -999,7 +999,10 @@ func (s *MemorySeriesStorage) logThrottling() { select { case <-s.throttled: if !timer.Stop() { - <-timer.C + select { + case <-timer.C: + default: + } score, _ := s.getPersistenceUrgencyScore() log. With("urgencyScore", score). From f20e6a0ae4ed63db328aea16c20c49f8dd8a5750 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 6 Oct 2017 17:20:20 +0200 Subject: [PATCH 5/6] Only respond to API requests once the server is ready --- web/api/v1/api.go | 17 +++++++++++++---- web/api/v1/api_test.go | 3 ++- web/web.go | 1 + 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index a42da28b40..131ed70ea4 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -107,10 +107,18 @@ type API struct { now func() model.Time config func() config.Config + ready func(http.HandlerFunc) http.HandlerFunc } // NewAPI returns an initialized API type. -func NewAPI(qe *promql.Engine, st local.Storage, tr targetRetriever, ar alertmanagerRetriever, configFunc func() config.Config) *API { +func NewAPI( + qe *promql.Engine, + st local.Storage, + tr targetRetriever, + ar alertmanagerRetriever, + configFunc func() config.Config, + readyFunc func(http.HandlerFunc) http.HandlerFunc, +) *API { return &API{ QueryEngine: qe, Storage: st, @@ -118,6 +126,7 @@ func NewAPI(qe *promql.Engine, st local.Storage, tr targetRetriever, ar alertman alertmanagerRetriever: ar, now: model.Now, config: configFunc, + ready: readyFunc, } } @@ -134,9 +143,9 @@ func (api *API) Register(r *route.Router) { w.WriteHeader(http.StatusNoContent) } }) - return prometheus.InstrumentHandler(name, httputil.CompressionHandler{ + return api.ready(prometheus.InstrumentHandler(name, httputil.CompressionHandler{ Handler: hf, - }) + })) } r.Options("/*path", instr("options", api.options)) @@ -153,7 +162,7 @@ func (api *API) Register(r *route.Router) { r.Get("/alertmanagers", instr("alertmanagers", api.alertmanagers)) r.Get("/status/config", instr("config", api.serveConfig)) - r.Post("/read", prometheus.InstrumentHandler("read", http.HandlerFunc(api.remoteRead))) + r.Post("/read", api.ready(prometheus.InstrumentHandler("read", http.HandlerFunc(api.remoteRead)))) } type queryData struct { diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 370795d26a..463fb1d40d 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -104,6 +104,7 @@ func TestEndpoints(t *testing.T) { config: func() config.Config { return samplePrometheusCfg }, + ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, } start := model.Time(0) @@ -723,7 +724,7 @@ func TestParseDuration(t *testing.T) { func TestOptionsMethod(t *testing.T) { r := route.New() - api := &API{} + api := &API{ready: func(f http.HandlerFunc) http.HandlerFunc { return f }} api.Register(r) s := httptest.NewServer(r) diff --git a/web/web.go b/web/web.go index 0f87f6b942..396853d020 100644 --- a/web/web.go +++ b/web/web.go @@ -172,6 +172,7 @@ func New(o *Options) *Handler { defer h.mtx.RUnlock() return *h.config }, + h.testReady, ) if o.RoutePrefix != "/" { From 5829e4880d54451c41fda072f0a659f1037ad6a8 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 6 Oct 2017 16:30:09 +0200 Subject: [PATCH 6/6] Cut v1.8.0 --- CHANGELOG.md | 30 ++++++++++++++++++++++++++++++ VERSION | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73a133ffbd..ebd71eade9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,33 @@ +## 1.8.0 / 2017-10-06 + +* [CHANGE] Rule links link to the _Console_ tab rather than the _Graph_ tab to + not trigger expensive range queries by default. +* [FEATURE] Ability to act as a remote read endpoint for other Prometheus + servers. +* [FEATURE] K8s SD: Support discovery of ingresses. +* [FEATURE] Consul SD: Support for node metadata. +* [FEATURE] Openstack SD: Support discovery of hypervisors. +* [FEATURE] Expose current Prometheus config via `/status/config`. +* [FEATURE] Allow to collapse jobs on `/targets` page. +* [FEATURE] Add `/-/healthy` and `/-/ready` endpoints. +* [FEATURE] Add color scheme support to console templates. +* [ENHANCEMENT] Remote storage connections use HTTP keep-alive. +* [ENHANCEMENT] Improved logging about remote storage. +* [ENHANCEMENT] Relaxed URL validation. +* [ENHANCEMENT] Openstack SD: Handle instances without IP. +* [ENHANCEMENT] Make remote storage queue manager configurable. +* [ENHANCEMENT] Validate metrics returned from remote read. +* [ENHANCEMENT] EC2 SD: Set a default region. +* [ENHANCEMENT] Changed help link to `https://prometheus.io/docs`. +* [BUGFIX] Fix floating-point precision issue in `deriv` function. +* [BUGFIX] Fix pprof endpoints when -web.route-prefix or -web.external-url is + used. +* [BUGFIX] Fix handling of `null` target groups in file-based SD. +* [BUGFIX] Set the sample timestamp in date-related PromQL functions. +* [BUGFIX] Apply path prefix to redirect from deprecated graph URL. +* [BUGFIX] Fixed tests on MS Windows. +* [BUGFIX] Check for invalid UTF-8 in label values after relabeling. + ## 1.7.2 / 2017-09-26 * [BUGFIX] Correctly remove all targets from DNS service discovery if the diff --git a/VERSION b/VERSION index f8a696c8dc..27f9cd322b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.2 +1.8.0