From b16371595daa6eddff192b63d3f570d2a32bb858 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 31 Jan 2017 17:44:22 +0100 Subject: [PATCH 01/37] Add standalone remote storage bridge example In preparation for removing specific remote storage implementations, this offers an example of how to achieve the same in a separate process. Rather than having three separate bridges for OpenTSDB, InfluxDB, and Graphite, I decided to support all in one binary. For now, this is in the example documenation directory, but perhaps we will want to make a first-class project / repository out of it. --- .../{ => example_receiver}/README.md | 2 +- .../{ => example_receiver}/server.go | 0 .../remote_storage_bridge/README.md | 35 +++ .../remote_storage_bridge/main.go | 231 ++++++++++++++++++ 4 files changed, 267 insertions(+), 1 deletion(-) rename documentation/examples/remote_storage/{ => example_receiver}/README.md (94%) rename documentation/examples/remote_storage/{ => example_receiver}/server.go (100%) create mode 100644 documentation/examples/remote_storage/remote_storage_bridge/README.md create mode 100644 documentation/examples/remote_storage/remote_storage_bridge/main.go diff --git a/documentation/examples/remote_storage/README.md b/documentation/examples/remote_storage/example_receiver/README.md similarity index 94% rename from documentation/examples/remote_storage/README.md rename to documentation/examples/remote_storage/example_receiver/README.md index 483bb22dce..3a0be8c0ba 100644 --- a/documentation/examples/remote_storage/README.md +++ b/documentation/examples/remote_storage/example_receiver/README.md @@ -7,7 +7,7 @@ To use it: ``` go build -./remote_storage +./example_receiver ``` ...and then add the following to your `prometheus.yml`: diff --git a/documentation/examples/remote_storage/server.go b/documentation/examples/remote_storage/example_receiver/server.go similarity index 100% rename from documentation/examples/remote_storage/server.go rename to documentation/examples/remote_storage/example_receiver/server.go diff --git a/documentation/examples/remote_storage/remote_storage_bridge/README.md b/documentation/examples/remote_storage/remote_storage_bridge/README.md new file mode 100644 index 0000000000..ad194c7169 --- /dev/null +++ b/documentation/examples/remote_storage/remote_storage_bridge/README.md @@ -0,0 +1,35 @@ +# Remote storage bridge + +This is a bridge that receives samples in Prometheus's remote storage +format and forwards them to Graphite, InfluxDB, or OpenTSDB. It is meant +as a replacement for the built-in specific remote storage implementations +that have been removed from Prometheus. + +## Building + +``` +go build +``` + +## Running + +Example: + +``` +./remote_storage_bridge -graphite-address=localhost:8080 -opentsdb-url=http://localhost:8081/ +``` + +To show all flags: + +``` +./remote_storage_bridge -h +``` + +## Configuring Prometheus + +To configure Prometheus to send samples to this bridge, add the following to your `prometheus.yml`: + +```yaml +remote_write: + url: "http://localhost:9201/receive" +``` \ No newline at end of file diff --git a/documentation/examples/remote_storage/remote_storage_bridge/main.go b/documentation/examples/remote_storage/remote_storage_bridge/main.go new file mode 100644 index 0000000000..b5b3f12ab9 --- /dev/null +++ b/documentation/examples/remote_storage/remote_storage_bridge/main.go @@ -0,0 +1,231 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The main package for the Prometheus server executable. +package main + +import ( + "flag" + "io/ioutil" + "net/http" + _ "net/http/pprof" + "net/url" + "os" + "sync" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/storage/remote" + "github.com/prometheus/prometheus/storage/remote/graphite" + "github.com/prometheus/prometheus/storage/remote/influxdb" + "github.com/prometheus/prometheus/storage/remote/opentsdb" + + influx "github.com/influxdb/influxdb/client" +) + +type config struct { + graphiteAddress string + graphiteTransport string + graphitePrefix string + opentsdbURL string + influxdbURL string + influxdbRetentionPolicy string + influxdbUsername string + influxdbDatabase string + influxdbPassword string + remoteTimeout time.Duration + listenAddr string + telemetryPath string +} + +var ( + receivedSamples = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "received_samples_total", + Help: "Total number of received samples.", + }, + ) + sentSamples = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "sent_samples_total", + Help: "Total number of processed samples sent to remote storage.", + }, + []string{"remote"}, + ) + failedSamples = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "failed_samples_total", + Help: "Total number of processed samples which failed on send to remote storage.", + }, + []string{"remote"}, + ) + sentBatchDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "sent_batch_duration_seconds", + Help: "Duration of sample batch send calls to the remote storage.", + Buckets: prometheus.DefBuckets, + }, + []string{"remote"}, + ) +) + +func init() { + prometheus.MustRegister(receivedSamples) + prometheus.MustRegister(sentSamples) + prometheus.MustRegister(failedSamples) + prometheus.MustRegister(sentBatchDuration) +} + +func main() { + cfg := parseFlags() + http.Handle(cfg.telemetryPath, prometheus.Handler()) + + clients := buildClients(cfg) + serve(cfg.listenAddr, clients) +} + +func parseFlags() *config { + cfg := &config{ + influxdbPassword: os.Getenv("INFLUXDB_PW"), + } + + flag.StringVar(&cfg.graphiteAddress, "graphite-address", "", + "The host:port of the Graphite server to send samples to. None, if empty.", + ) + flag.StringVar(&cfg.graphiteTransport, "graphite-transport", "tcp", + "Transport protocol to use to communicate with Graphite. 'tcp', if empty.", + ) + flag.StringVar(&cfg.graphitePrefix, "graphite-prefix", "", + "The prefix to prepend to all metrics exported to Graphite. None, if empty.", + ) + flag.StringVar(&cfg.opentsdbURL, "opentsdb-url", "", + "The URL of the remote OpenTSDB server to send samples to. None, if empty.", + ) + flag.StringVar(&cfg.influxdbURL, "influxdb-url", "", + "The URL of the remote InfluxDB server to send samples to. None, if empty.", + ) + flag.StringVar(&cfg.influxdbRetentionPolicy, "influxdb.retention-policy", "default", + "The InfluxDB retention policy to use.", + ) + flag.StringVar(&cfg.influxdbUsername, "influxdb.username", "", + "The username to use when sending samples to InfluxDB. The corresponding password must be provided via the INFLUXDB_PW environment variable.", + ) + flag.StringVar(&cfg.influxdbDatabase, "influxdb.database", "prometheus", + "The name of the database to use for storing samples in InfluxDB.", + ) + flag.DurationVar(&cfg.remoteTimeout, "send-timeout", 30*time.Second, + "The timeout to use when sending samples to the remote storage.", + ) + flag.StringVar(&cfg.listenAddr, "web.listen-address", ":9201", "Address to listen on for web endpoints.") + flag.StringVar(&cfg.telemetryPath, "web.telemetry-path", "/metrics", "Address to listen on for web endpoints.") + + flag.Parse() + + return cfg +} + +func buildClients(cfg *config) []remote.StorageClient { + var clients []remote.StorageClient + if cfg.graphiteAddress != "" { + c := graphite.NewClient( + cfg.graphiteAddress, cfg.graphiteTransport, + cfg.remoteTimeout, cfg.graphitePrefix) + clients = append(clients, c) + } + if cfg.opentsdbURL != "" { + c := opentsdb.NewClient(cfg.opentsdbURL, cfg.remoteTimeout) + clients = append(clients, c) + } + if cfg.influxdbURL != "" { + url, err := url.Parse(cfg.influxdbURL) + if err != nil { + log.Fatalf("Failed to parse InfluxDB URL %q: %v", cfg.influxdbURL, err) + } + conf := influx.Config{ + URL: *url, + Username: cfg.influxdbUsername, + Password: cfg.influxdbPassword, + Timeout: cfg.remoteTimeout, + } + c := influxdb.NewClient(conf, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy) + prometheus.MustRegister(c) + clients = append(clients, c) + } + return clients +} + +func serve(addr string, clients []remote.StorageClient) error { + http.HandleFunc("/receive", func(w http.ResponseWriter, r *http.Request) { + reqBuf, err := ioutil.ReadAll(snappy.NewReader(r.Body)) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + var req remote.WriteRequest + if err := proto.Unmarshal(reqBuf, &req); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + samples := protoToSamples(&req) + receivedSamples.Add(float64(len(samples))) + + var wg sync.WaitGroup + for _, c := range clients { + wg.Add(1) + go func(rc remote.StorageClient) { + sendSamples(rc, samples) + wg.Done() + }(c) + } + wg.Wait() + }) + + return http.ListenAndServe(addr, nil) +} + +func protoToSamples(req *remote.WriteRequest) model.Samples { + var samples model.Samples + for _, ts := range req.Timeseries { + metric := make(model.Metric, len(ts.Labels)) + for _, l := range ts.Labels { + metric[model.LabelName(l.Name)] = model.LabelValue(l.Value) + } + + for _, s := range ts.Samples { + samples = append(samples, &model.Sample{ + Metric: metric, + Value: model.SampleValue(s.Value), + Timestamp: model.Time(s.TimestampMs), + }) + } + } + return samples +} + +func sendSamples(c remote.StorageClient, samples model.Samples) { + begin := time.Now() + err := c.Store(samples) + duration := time.Since(begin).Seconds() + if err != nil { + log.Warnf("Error sending %d samples to remote storage %q: %v", len(samples), c.Name(), err) + failedSamples.WithLabelValues(c.Name()).Add(float64(len(samples))) + } + sentSamples.WithLabelValues(c.Name()).Add(float64(len(samples))) + sentBatchDuration.WithLabelValues(c.Name()).Observe(duration) +} From b2f086c6c43cf4b1cc0ed5dc00ddd6ea45c76f96 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 1 Feb 2017 15:34:03 +0100 Subject: [PATCH 02/37] storage: Expose bug of not setting the shrink ratio in the contstructor --- storage/local/persistence_test.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/storage/local/persistence_test.go b/storage/local/persistence_test.go index 27f620366b..e88b17592b 100644 --- a/storage/local/persistence_test.go +++ b/storage/local/persistence_test.go @@ -42,7 +42,7 @@ var ( func newTestPersistence(t *testing.T, encoding chunk.Encoding) (*persistence, testutil.Closer) { chunk.DefaultEncoding = encoding dir := testutil.NewTemporaryDirectory("test_persistence", t) - p, err := newPersistence(dir.Path(), false, false, func() bool { return false }, 0.1) + p, err := newPersistence(dir.Path(), false, false, func() bool { return false }, 0.15) if err != nil { dir.Close() t.Fatal(err) @@ -173,6 +173,25 @@ func testPersistLoadDropChunks(t *testing.T, encoding chunk.Encoding) { } } + // Try to drop one chunk, which must be prevented by the shrink ratio. + for fp, _ := range fpToChunks { + firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 1, nil) + if err != nil { + t.Fatal(err) + } + if offset != 0 { + t.Errorf("want offset 0, got %d", offset) + } + if firstTime != 0 { + t.Errorf("want first time 0, got %d", firstTime) + } + if numDropped != 0 { + t.Errorf("want 0 dropped chunks, got %v", numDropped) + } + if allDropped { + t.Error("all chunks dropped") + } + } // Drop half of the chunks. for fp, expectedChunks := range fpToChunks { firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 5, nil) From 4ccfc93dcfd2891a2fa9943175186bd796bcadb5 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 1 Feb 2017 15:36:38 +0100 Subject: [PATCH 03/37] storage: Set shrink ratio in the constructor. --- storage/local/persistence.go | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/local/persistence.go b/storage/local/persistence.go index 4d27334969..5015fb3c6b 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -310,6 +310,7 @@ func newPersistence( dirtyFileName: dirtyPath, fLock: fLock, shouldSync: shouldSync, + minShrinkRatio: minShrinkRatio, // Create buffers of length 3*chunkLenWithHeader by default because that is still reasonably small // and at the same time enough for many uses. The contract is to never return buffer smaller than // that to the pool so that callers can rely on a minimum buffer size. From 34767c2221686edd4c69c586778137e169fc7284 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Wed, 1 Feb 2017 19:49:50 +0000 Subject: [PATCH 04/37] Clone lset before relabelling. (#2386) We need to not change the lset passed into populateLabels, as that is kept around by the SDs. Fixes 2377 --- retrieval/target.go | 1 + retrieval/targetmanager_test.go | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/retrieval/target.go b/retrieval/target.go index ca5cc0db7b..07628c70e7 100644 --- a/retrieval/target.go +++ b/retrieval/target.go @@ -305,6 +305,7 @@ func (app *countingAppender) Append(s *model.Sample) error { // It returns a label set before relabeling was applied as the second return value. // Returns a nil label set if the target is dropped during relabeling. func populateLabels(lset model.LabelSet, cfg *config.ScrapeConfig) (res, orig model.LabelSet, err error) { + lset = lset.Clone() if _, ok := lset[model.AddressLabel]; !ok { return nil, nil, fmt.Errorf("no address") } diff --git a/retrieval/targetmanager_test.go b/retrieval/targetmanager_test.go index dc4568a955..a8b6292047 100644 --- a/retrieval/targetmanager_test.go +++ b/retrieval/targetmanager_test.go @@ -140,10 +140,14 @@ func TestPopulateLabels(t *testing.T) { }, } for i, c := range cases { + in := c.in.Clone() res, orig, err := populateLabels(c.in, c.cfg) if err != nil { t.Fatalf("case %d: %s", i, err) } + if !reflect.DeepEqual(c.in, in) { + t.Errorf("case %d: input lset was changed was\n\t%+v\n now\n\t%+v", i, in, c.in) + } if !reflect.DeepEqual(res, c.res) { t.Errorf("case %d: expected res\n\t%+v\n got\n\t%+v", i, c.res, res) } From 752fac60ae4a07e742dd5908120d8bc3b9fe656b Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 1 Feb 2017 19:41:15 +0100 Subject: [PATCH 05/37] storage: Remove race condition from TestLoop --- storage/local/storage_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/storage/local/storage_test.go b/storage/local/storage_test.go index d63b6d3ab1..591e3df37b 100644 --- a/storage/local/storage_test.go +++ b/storage/local/storage_test.go @@ -840,10 +840,15 @@ func TestLoop(t *testing.T) { storage.Append(s) } storage.WaitForIndexing() - series, _ := storage.fpToSeries.get(model.Metric{}.FastFingerprint()) + fp := model.Metric{}.FastFingerprint() + series, _ := storage.fpToSeries.get(fp) + storage.fpLocker.Lock(fp) cdsBefore := len(series.chunkDescs) + storage.fpLocker.Unlock(fp) time.Sleep(fpMaxWaitDuration + time.Second) // TODO(beorn7): Ugh, need to wait for maintenance to kick in. + storage.fpLocker.Lock(fp) cdsAfter := len(series.chunkDescs) + storage.fpLocker.Unlock(fp) storage.Stop() if cdsBefore <= cdsAfter { t.Errorf( From 65dc8f44d3c7cfb8981026afdacd67aa7cd2dfa7 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 1 Feb 2017 20:14:01 +0100 Subject: [PATCH 06/37] storage: Test for errors returned by MaybePopulateLastTime --- storage/local/heads.go | 4 +++- storage/local/persistence.go | 2 +- storage/local/series.go | 13 +++++++------ storage/local/storage.go | 7 ++++++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/storage/local/heads.go b/storage/local/heads.go index 14a3175926..15d2fcabbb 100644 --- a/storage/local/heads.go +++ b/storage/local/heads.go @@ -188,7 +188,9 @@ func (hs *headsScanner) scan() bool { // This is NOT the head chunk. So it's a chunk // to be persisted, and we need to populate lastTime. hs.chunksToPersistTotal++ - cd.MaybePopulateLastTime() + if hs.err = cd.MaybePopulateLastTime(); hs.err != nil { + return false + } } chunkDescs[i] = cd } diff --git a/storage/local/persistence.go b/storage/local/persistence.go index 5015fb3c6b..ebaceeaea5 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -693,7 +693,7 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap } // persistWatermark. We only checkpoint chunks that need persisting, so // this is always 0. - if _, err = codable.EncodeVarint(w, int64(0)); err != nil { + if _, err = codable.EncodeVarint(w, 0); err != nil { return } if m.series.modTime.IsZero() { diff --git a/storage/local/series.go b/storage/local/series.go index 4a97c3c5c3..bb4ee6bc54 100644 --- a/storage/local/series.go +++ b/storage/local/series.go @@ -247,7 +247,9 @@ func (s *memorySeries) add(v model.SamplePair) (int, error) { // Populate lastTime of now-closed chunks. for _, cd := range s.chunkDescs[len(s.chunkDescs)-len(chunks) : len(s.chunkDescs)-1] { - cd.MaybePopulateLastTime() + if err := cd.MaybePopulateLastTime(); err != nil { + return 0, err + } } s.lastTime = v.Timestamp @@ -261,19 +263,18 @@ func (s *memorySeries) add(v model.SamplePair) (int, error) { // If the head chunk is already closed, the method is a no-op and returns false. // // The caller must have locked the fingerprint of the series. -func (s *memorySeries) maybeCloseHeadChunk() bool { +func (s *memorySeries) maybeCloseHeadChunk() (bool, error) { if s.headChunkClosed { - return false + return false, nil } if time.Now().Sub(s.lastTime.Time()) > headChunkTimeout { s.headChunkClosed = true // Since we cannot modify the head chunk from now on, we // don't need to bother with cloning anymore. s.headChunkUsedByIterator = false - s.head().MaybePopulateLastTime() - return true + return true, s.head().MaybePopulateLastTime() } - return false + return false, nil } // evictChunkDescs evicts chunkDescs if the chunk is evicted. diff --git a/storage/local/storage.go b/storage/local/storage.go index 8c8ca8d2fa..aa0745e985 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -1376,7 +1376,12 @@ func (s *MemorySeriesStorage) maintainMemorySeries( defer s.seriesOps.WithLabelValues(memoryMaintenance).Inc() - if series.maybeCloseHeadChunk() { + closed, err := series.maybeCloseHeadChunk() + if err != nil { + s.quarantineSeries(fp, series.metric, err) + s.persistErrors.Inc() + } + if closed { s.incNumChunksToPersist(1) } From 31e9db7f0cec1719834aeca2cda17e02a7a3b434 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Sat, 4 Feb 2017 22:29:37 +0100 Subject: [PATCH 07/37] storage: Simplify evictChunkDesc method --- storage/local/series.go | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/storage/local/series.go b/storage/local/series.go index bb4ee6bc54..d9c36d0efc 100644 --- a/storage/local/series.go +++ b/storage/local/series.go @@ -277,24 +277,23 @@ func (s *memorySeries) maybeCloseHeadChunk() (bool, error) { return false, nil } -// evictChunkDescs evicts chunkDescs if the chunk is evicted. -// iOldestNotEvicted is the index within the current chunkDescs of the oldest -// chunk that is not evicted. -func (s *memorySeries) evictChunkDescs(iOldestNotEvicted int) { - lenToKeep := len(s.chunkDescs) - iOldestNotEvicted - if lenToKeep < len(s.chunkDescs) { - s.savedFirstTime = s.firstTime() - lenEvicted := len(s.chunkDescs) - lenToKeep - s.chunkDescsOffset += lenEvicted - s.persistWatermark -= lenEvicted - chunk.DescOps.WithLabelValues(chunk.Evict).Add(float64(lenEvicted)) - chunk.NumMemDescs.Sub(float64(lenEvicted)) - s.chunkDescs = append( - make([]*chunk.Desc, 0, lenToKeep), - s.chunkDescs[lenEvicted:]..., - ) - s.dirty = true +// evictChunkDescs evicts chunkDescs. lenToEvict is the index within the current +// chunkDescs of the oldest chunk that is not evicted. +func (s *memorySeries) evictChunkDescs(lenToEvict int) { + if lenToEvict < 1 { + return } + lenToKeep := len(s.chunkDescs) - lenToEvict + s.savedFirstTime = s.firstTime() + s.chunkDescsOffset += lenToEvict + s.persistWatermark -= lenToEvict + chunk.DescOps.WithLabelValues(chunk.Evict).Add(float64(lenToEvict)) + chunk.NumMemDescs.Sub(float64(lenToEvict)) + s.chunkDescs = append( + make([]*chunk.Desc, 0, lenToKeep), + s.chunkDescs[lenToEvict:]..., + ) + s.dirty = true } // dropChunks removes chunkDescs older than t. The caller must have locked the From 75282b27ba6ae6a45a49eb78afcf39218372ca84 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Sat, 4 Feb 2017 23:40:22 +0100 Subject: [PATCH 08/37] storage: Added checks for invariants --- storage/local/persistence.go | 3 +++ storage/local/series.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/storage/local/persistence.go b/storage/local/persistence.go index ebaceeaea5..a76d5de8e8 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -706,6 +706,9 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap } } // chunkDescsOffset. + if m.series.chunkDescsOffset < 0 && m.series.persistWatermark > 0 { + panic("encountered unknown chunk desc offset in combination with positive persist watermark") + } if _, err = codable.EncodeVarint(w, int64(m.series.chunkDescsOffset+m.series.persistWatermark)); err != nil { return } diff --git a/storage/local/series.go b/storage/local/series.go index d9c36d0efc..63e4a0e2a5 100644 --- a/storage/local/series.go +++ b/storage/local/series.go @@ -283,6 +283,9 @@ func (s *memorySeries) evictChunkDescs(lenToEvict int) { if lenToEvict < 1 { return } + if s.chunkDescsOffset < 0 { + panic("chunk desc eviction requested with unknown chunk desc offset") + } lenToKeep := len(s.chunkDescs) - lenToEvict s.savedFirstTime = s.firstTime() s.chunkDescsOffset += lenToEvict From 244a65fb293573cad7b80407ff0e14ff96297ec4 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Sun, 5 Feb 2017 02:25:09 +0100 Subject: [PATCH 09/37] storage: Increase persist watermark before calling append The append call may reuse cds, and thus change its len. (In practice, this wouldn't happen as cds should have len==cap. Still, the previous order of lines was problematic.) --- storage/local/series.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/local/series.go b/storage/local/series.go index 63e4a0e2a5..ba8898a596 100644 --- a/storage/local/series.go +++ b/storage/local/series.go @@ -464,9 +464,9 @@ func (s *memorySeries) preloadChunksForRange( fp, s.chunkDescsOffset, len(cds), ) } + s.persistWatermark += len(cds) s.chunkDescs = append(cds, s.chunkDescs...) s.chunkDescsOffset = 0 - s.persistWatermark += len(cds) if len(s.chunkDescs) > 0 { firstChunkDescTime = s.chunkDescs[0].FirstTime() } From 2363a90adc48b6408315a1f5a9c850fa33fe997f Mon Sep 17 00:00:00 2001 From: beorn7 Date: Mon, 6 Feb 2017 17:39:59 +0100 Subject: [PATCH 10/37] storage: Do not throw away fully persisted memory series in checkpointing --- storage/local/persistence.go | 70 +++++++++++++++++++++++-------- storage/local/persistence_test.go | 31 ++++++++++++-- 2 files changed, 81 insertions(+), 20 deletions(-) diff --git a/storage/local/persistence.go b/storage/local/persistence.go index a76d5de8e8..4a3150c7b1 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -670,12 +670,39 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap defer fpLocker.Unlock(m.fp) chunksToPersist := len(m.series.chunkDescs) - m.series.persistWatermark - if len(m.series.chunkDescs) == 0 || chunksToPersist == 0 { - // This series was completely purged or archived in the meantime or has - // no chunks that need persisting. Ignore. + if len(m.series.chunkDescs) == 0 { + // This series was completely purged or archived + // in the meantime. Ignore. return } realNumberOfSeries++ + + // Sanity checks. + if m.series.chunkDescsOffset < 0 && m.series.persistWatermark > 0 { + panic("encountered unknown chunk desc offset in combination with positive persist watermark") + } + + // These are the values to save in the normal case. + var ( + // persistWatermark is zero as we only checkpoint non-persisted chunks. + persistWatermark int64 + // chunkDescsOffset is shifted by the original persistWatermark for the same reason. + chunkDescsOffset = int64(m.series.chunkDescsOffset + m.series.persistWatermark) + numChunkDescs = int64(chunksToPersist) + ) + // However, in the special case of a series being fully + // persisted but still in memory (i.e. not archived), we + // need to save a "placeholder", for which we use just + // the chunk desc of the last chunk. Values have to be + // adjusted accordingly. (The reason for doing it in + // this weird way is to keep the checkpoint format + // compatible with older versions.) + if chunksToPersist == 0 { + persistWatermark = 1 + chunkDescsOffset-- // Save one chunk desc after all. + numChunkDescs = 1 + } + // seriesFlags left empty in v2. if err = w.WriteByte(0); err != nil { return @@ -691,9 +718,7 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap if _, err = w.Write(buf); err != nil { return } - // persistWatermark. We only checkpoint chunks that need persisting, so - // this is always 0. - if _, err = codable.EncodeVarint(w, 0); err != nil { + if _, err = codable.EncodeVarint(w, persistWatermark); err != nil { return } if m.series.modTime.IsZero() { @@ -705,28 +730,39 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap return } } - // chunkDescsOffset. - if m.series.chunkDescsOffset < 0 && m.series.persistWatermark > 0 { - panic("encountered unknown chunk desc offset in combination with positive persist watermark") - } - if _, err = codable.EncodeVarint(w, int64(m.series.chunkDescsOffset+m.series.persistWatermark)); err != nil { + if _, err = codable.EncodeVarint(w, chunkDescsOffset); err != nil { return } if _, err = codable.EncodeVarint(w, int64(m.series.savedFirstTime)); err != nil { return } - // Number of chunkDescs. - if _, err = codable.EncodeVarint(w, int64(chunksToPersist)); err != nil { + if _, err = codable.EncodeVarint(w, numChunkDescs); err != nil { return } - for _, chunkDesc := range m.series.chunkDescs[m.series.persistWatermark:] { - if err = w.WriteByte(byte(chunkDesc.C.Encoding())); err != nil { + if chunksToPersist == 0 { + // Save the one placeholder chunk desc for a fully persisted series. + chunkDesc := m.series.chunkDescs[len(m.series.chunkDescs)-1] + if _, err = codable.EncodeVarint(w, int64(chunkDesc.FirstTime())); err != nil { return } - if err = chunkDesc.C.Marshal(w); err != nil { + lt, err := chunkDesc.LastTime() + if err != nil { return } - p.checkpointChunksWritten.Observe(float64(chunksToPersist)) + if _, err = codable.EncodeVarint(w, int64(lt)); err != nil { + return + } + } else { + // Save (only) the non-persisted chunks. + for _, chunkDesc := range m.series.chunkDescs[m.series.persistWatermark:] { + if err = w.WriteByte(byte(chunkDesc.C.Encoding())); err != nil { + return + } + if err = chunkDesc.C.Marshal(w); err != nil { + return + } + p.checkpointChunksWritten.Observe(float64(chunksToPersist)) + } } // Series is checkpointed now, so declare it clean. In case the entire // checkpoint fails later on, this is fine, as the storage's series diff --git a/storage/local/persistence_test.go b/storage/local/persistence_test.go index e88b17592b..f881dd4e64 100644 --- a/storage/local/persistence_test.go +++ b/storage/local/persistence_test.go @@ -484,7 +484,10 @@ func testCheckpointAndLoadSeriesMapAndHeads(t *testing.T, encoding chunk.Encodin s1.add(model.SamplePair{Timestamp: 1, Value: 3.14}) s3.add(model.SamplePair{Timestamp: 2, Value: 2.7}) s3.headChunkClosed = true - s3.persistWatermark = 1 + // Create another chunk in s3. + s3.add(model.SamplePair{Timestamp: 3, Value: 1.4}) + s3.headChunkClosed = true + s3.persistWatermark = 2 for i := 0; i < 10000; i++ { s4.add(model.SamplePair{ Timestamp: model.Time(i), @@ -512,8 +515,8 @@ func testCheckpointAndLoadSeriesMapAndHeads(t *testing.T, encoding chunk.Encodin if err != nil { t.Fatal(err) } - if loadedSM.length() != 3 { - t.Errorf("want 3 series in map, got %d", loadedSM.length()) + if loadedSM.length() != 4 { + t.Errorf("want 4 series in map, got %d", loadedSM.length()) } if loadedS1, ok := loadedSM.get(m1.FastFingerprint()); ok { if !reflect.DeepEqual(loadedS1.metric, m1) { @@ -537,6 +540,28 @@ func testCheckpointAndLoadSeriesMapAndHeads(t *testing.T, encoding chunk.Encodin } else { t.Errorf("couldn't find %v in loaded map", m1) } + if loadedS3, ok := loadedSM.get(m3.FastFingerprint()); ok { + if !reflect.DeepEqual(loadedS3.metric, m3) { + t.Errorf("want metric %v, got %v", m3, loadedS3.metric) + } + if loadedS3.head().C != nil { + t.Error("head chunk not evicted") + } + if loadedS3.chunkDescsOffset != 1 { + t.Errorf("want chunkDescsOffset 1, got %d", loadedS3.chunkDescsOffset) + } + if !loadedS3.headChunkClosed { + t.Error("headChunkClosed is false") + } + if loadedS3.head().ChunkFirstTime != 3 { + t.Errorf("want ChunkFirstTime in head chunk to be 3, got %d", loadedS3.head().ChunkFirstTime) + } + if loadedS3.head().ChunkLastTime != 3 { + t.Errorf("want ChunkLastTime in head chunk to be 3, got %d", loadedS3.head().ChunkLastTime) + } + } else { + t.Errorf("couldn't find %v in loaded map", m3) + } if loadedS4, ok := loadedSM.get(m4.FastFingerprint()); ok { if !reflect.DeepEqual(loadedS4.metric, m4) { t.Errorf("want metric %v, got %v", m4, loadedS4.metric) From 3a96d0e267032b52bf7b0806869fb633bfeb0627 Mon Sep 17 00:00:00 2001 From: Svend Sorensen Date: Mon, 6 Feb 2017 13:28:12 -0800 Subject: [PATCH 11/37] Kubernetes SD: Fix namespace meta label Replace one more instance of `__meta_kubernetes_service_namespace` with `__meta_kubernetes_namespace`. --- documentation/examples/prometheus-kubernetes.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/examples/prometheus-kubernetes.yml b/documentation/examples/prometheus-kubernetes.yml index 6d5f7165eb..1f91d36a96 100644 --- a/documentation/examples/prometheus-kubernetes.yml +++ b/documentation/examples/prometheus-kubernetes.yml @@ -146,7 +146,7 @@ scrape_configs: target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_service_namespace] + - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name From eb6b95ac2ee68ed8995679f080b0c2b3301944a1 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 7 Feb 2017 10:55:01 +0100 Subject: [PATCH 12/37] Cut release 1.5.1 Sadly, this is urgently required. --- CHANGELOG.md | 7 +++++++ VERSION | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86b4370b37..e8055b4903 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## 1.5.1 / 2017-02-07 + +* [BUGFIX] Don't lose fully persisted memory series during checkpointing. +* [BUGFIX] Fix intermittently failing relabeling. +* [BUGFIX] Make `-storage.local.series-file-shrink-ratio` work. +* [BUGFIX] Remove race condition from TestLoop. + ## 1.5.0 / 2017-01-23 * [CHANGE] Use lexicographic order to sort alerts by name. diff --git a/VERSION b/VERSION index bc80560fad..26ca594609 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.5.0 +1.5.1 From be8b1eb6564bbf0ad7036ae4d8c8c3ee5884f2d3 Mon Sep 17 00:00:00 2001 From: Mitsuhiro Tanda Date: Wed, 8 Feb 2017 01:33:54 +0900 Subject: [PATCH 13/37] storage: optimize dropping chunks by using minShrinkRatio (#2397) storage: prevent unnecessary chunk header reading if minShrinkRatio > 0 --- storage/local/persistence.go | 45 +++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/storage/local/persistence.go b/storage/local/persistence.go index 4a3150c7b1..79d3edb392 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -19,6 +19,7 @@ import ( "fmt" "io" "io/ioutil" + "math" "os" "path/filepath" "strconv" @@ -918,8 +919,22 @@ func (p *persistence) dropAndPersistChunks( } defer f.Close() + fi, err := f.Stat() + if err != nil { + return + } + totalChunks := int(fi.Size())/chunkLenWithHeader + len(chunks) + + // Calculate chunk index from minShrinkRatio, to skip unnecessary chunk header reading. + chunkIndexToStartSeek := 0 + if p.minShrinkRatio < 1 { + chunkIndexToStartSeek = int(math.Floor(float64(totalChunks) * p.minShrinkRatio)) + } else { + chunkIndexToStartSeek = totalChunks - 1 + } + numDropped = chunkIndexToStartSeek + headerBuf := make([]byte, chunkHeaderLen) - var firstTimeInFile model.Time // Find the first chunk in the file that should be kept. for ; ; numDropped++ { _, err = f.Seek(offsetForChunkIndex(numDropped), os.SEEK_SET) @@ -946,11 +961,6 @@ func (p *persistence) dropAndPersistChunks( if err != nil { return } - if numDropped == 0 { - firstTimeInFile = model.Time( - binary.LittleEndian.Uint64(headerBuf[chunkHeaderFirstTimeOffset:]), - ) - } lastTime := model.Time( binary.LittleEndian.Uint64(headerBuf[chunkHeaderLastTimeOffset:]), ) @@ -959,18 +969,21 @@ func (p *persistence) dropAndPersistChunks( } } - // We've found the first chunk that should be kept. - // First check if the shrink ratio is good enough to perform the the - // actual drop or leave it for next time if it is not worth the effort. - fi, err := f.Stat() - if err != nil { - return - } - totalChunks := int(fi.Size())/chunkLenWithHeader + len(chunks) - if numDropped == 0 || float64(numDropped)/float64(totalChunks) < p.minShrinkRatio { + // If numDropped isn't incremented, the minShrinkRatio condition isn't satisfied. + if numDropped == chunkIndexToStartSeek { // Nothing to drop. Just adjust the return values and append the chunks (if any). numDropped = 0 - firstTimeNotDropped = firstTimeInFile + _, err = f.Seek(offsetForChunkIndex(0), os.SEEK_SET) + if err != nil { + return + } + _, err = io.ReadFull(f, headerBuf) + if err != nil { + return + } + firstTimeNotDropped = model.Time( + binary.LittleEndian.Uint64(headerBuf[chunkHeaderFirstTimeOffset:]), + ) if len(chunks) > 0 { offset, err = p.persistChunks(fp, chunks) } From 8c8baaa5589d3e00c7ca700c19b5e344b844aedc Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 8 Feb 2017 16:28:56 +0100 Subject: [PATCH 14/37] storage: writeMemorySeries needs to return true for quarantined series This is another fallout of my bug hunt. --- storage/local/storage.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/storage/local/storage.go b/storage/local/storage.go index aa0745e985..63eb94698f 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -1433,6 +1433,11 @@ func (s *MemorySeriesStorage) maintainMemorySeries( // contains no chunks after dropping old chunks, it is purged entirely. In that // case, the method returns true. // +// If a persist error is encountered, the series is queued for quarantine. In +// that case, the method returns true, too, because the series should not be +// processed anymore (even if it will only be gone for real once quarantining +// has been completed). +// // The caller must have locked the fp. func (s *MemorySeriesStorage) writeMemorySeries( fp model.Fingerprint, series *memorySeries, beforeTime model.Time, @@ -1474,7 +1479,7 @@ func (s *MemorySeriesStorage) writeMemorySeries( var offset int offset, persistErr = s.persistence.persistChunks(fp, chunks) if persistErr != nil { - return false + return true } if series.chunkDescsOffset == -1 { // This is the first chunk persisted for a newly created @@ -1488,10 +1493,10 @@ func (s *MemorySeriesStorage) writeMemorySeries( newFirstTime, offset, numDroppedFromPersistence, allDroppedFromPersistence, persistErr := s.persistence.dropAndPersistChunks(fp, beforeTime, chunks) if persistErr != nil { - return false + return true } if persistErr = series.dropChunks(beforeTime); persistErr != nil { - return false + return true } if len(series.chunkDescs) == 0 && allDroppedFromPersistence { // All chunks dropped from both memory and persistence. Delete the series for good. From bed493422472f28ddf8e5ff3f212c4b6860896ca Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 9 Feb 2017 01:53:57 +0100 Subject: [PATCH 15/37] storage: One more persist error code path discovered Also, in that code path, set chunkDescsOffset to 0 rather than -1 in case of "dropped more chunks from persistence than from memory" so that no other weird things happen before the series is quarantined for good. --- storage/local/storage.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage/local/storage.go b/storage/local/storage.go index 63eb94698f..25dc30b0a7 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -1513,7 +1513,8 @@ func (s *MemorySeriesStorage) writeMemorySeries( series.chunkDescsOffset -= numDroppedFromPersistence if series.chunkDescsOffset < 0 { persistErr = errors.New("dropped more chunks from persistence than from memory") - series.chunkDescsOffset = -1 + series.chunkDescsOffset = 0 + return true } } return false From 46a0837816681aff55504d5729a05a5ed225df37 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 9 Feb 2017 14:35:07 +0100 Subject: [PATCH 16/37] storage: Fix offset returned by dropAndPersistChunks This is another corner-case that was previously never exercised because the rewriting of a series file was never prevented by the shrink ratio. Scenario: There is an existing series on disk, which is archived. If a new sample comes in for that file, a new chunk in memory is created, and the chunkDescsOffset is set to -1. If series maintenance happens before the series has at least one chunk to persist _and_ an insufficient chunks on disk is old enough for purging (so that the shrink ratio kicks in), dropAndPersistChunks would return 0, but it should return the chunk length of the series file. --- storage/local/persistence.go | 19 ++++++++++++------- storage/local/persistence_test.go | 14 ++++++++------ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/storage/local/persistence.go b/storage/local/persistence.go index 4a3150c7b1..f3d840f76a 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -845,12 +845,14 @@ func (p *persistence) loadSeriesMapAndHeads() (sm *seriesMap, chunksToPersist in // dropAndPersistChunks deletes all chunks from a series file whose last sample // time is before beforeTime, and then appends the provided chunks, leaving out // those whose last sample time is before beforeTime. It returns the timestamp -// of the first sample in the oldest chunk _not_ dropped, the offset within the -// series file of the first chunk persisted (out of the provided chunks), the -// number of deleted chunks, and true if all chunks of the series have been -// deleted (in which case the returned timestamp will be 0 and must be ignored). -// It is the caller's responsibility to make sure nothing is persisted or loaded -// for the same fingerprint concurrently. +// of the first sample in the oldest chunk _not_ dropped, the chunk offset +// within the series file of the first chunk persisted (out of the provided +// chunks, or - if no chunks were provided - the chunk offset where chunks would +// have been persisted, i.e. the end of the file), the number of deleted chunks, +// and true if all chunks of the series have been deleted (in which case the +// returned timestamp will be 0 and must be ignored). It is the caller's +// responsibility to make sure nothing is persisted or loaded for the same +// fingerprint concurrently. // // Returning an error signals problems with the series file. In this case, the // caller should quarantine the series. @@ -966,13 +968,16 @@ func (p *persistence) dropAndPersistChunks( if err != nil { return } - totalChunks := int(fi.Size())/chunkLenWithHeader + len(chunks) + chunksInFile := int(fi.Size()) / chunkLenWithHeader + totalChunks := chunksInFile + len(chunks) if numDropped == 0 || float64(numDropped)/float64(totalChunks) < p.minShrinkRatio { // Nothing to drop. Just adjust the return values and append the chunks (if any). numDropped = 0 firstTimeNotDropped = firstTimeInFile if len(chunks) > 0 { offset, err = p.persistChunks(fp, chunks) + } else { + offset = chunksInFile } return } diff --git a/storage/local/persistence_test.go b/storage/local/persistence_test.go index f881dd4e64..2814a8f3aa 100644 --- a/storage/local/persistence_test.go +++ b/storage/local/persistence_test.go @@ -173,14 +173,16 @@ func testPersistLoadDropChunks(t *testing.T, encoding chunk.Encoding) { } } - // Try to drop one chunk, which must be prevented by the shrink ratio. + // Try to drop one chunk, which must be prevented by the shrink + // ratio. Since we do not pass in any chunks to persist, the offset + // should be the number of chunks in the file. for fp, _ := range fpToChunks { firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 1, nil) if err != nil { t.Fatal(err) } - if offset != 0 { - t.Errorf("want offset 0, got %d", offset) + if offset != 10 { + t.Errorf("want offset 10, got %d", offset) } if firstTime != 0 { t.Errorf("want first time 0, got %d", firstTime) @@ -422,14 +424,14 @@ func testPersistLoadDropChunks(t *testing.T, encoding chunk.Encoding) { t.Error("all chunks dropped") } } - // Drop only the first two chunks should not happen, either. + // Drop only the first two chunks should not happen, either. Chunks in file is now 9. for fp := range fpToChunks { firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 2, nil) if err != nil { t.Fatal(err) } - if offset != 0 { - t.Errorf("want offset 0, got %d", offset) + if offset != 9 { + t.Errorf("want offset 9, got %d", offset) } if firstTime != 0 { t.Errorf("want first time 0, got %d", firstTime) From d771185a43face7bcc88cda0ad6cb96e1bdfcd15 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 10 Feb 2017 11:42:59 +0100 Subject: [PATCH 17/37] storage: Fix chunkIndexToStartSeek calculation With a high enough shrink ratio and enough chunks to persist, the cutoff point could be _outside_ of the file, which wreaks havoc in the storage. --- storage/local/persistence.go | 5 ++- storage/local/persistence_test.go | 75 +++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/storage/local/persistence.go b/storage/local/persistence.go index 43b3f57fd4..7cb0c107d7 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -932,8 +932,9 @@ func (p *persistence) dropAndPersistChunks( chunkIndexToStartSeek := 0 if p.minShrinkRatio < 1 { chunkIndexToStartSeek = int(math.Floor(float64(totalChunks) * p.minShrinkRatio)) - } else { - chunkIndexToStartSeek = totalChunks - 1 + } + if chunkIndexToStartSeek >= chunksInFile { + chunkIndexToStartSeek = chunksInFile - 1 } numDropped = chunkIndexToStartSeek diff --git a/storage/local/persistence_test.go b/storage/local/persistence_test.go index 2814a8f3aa..31c82eecef 100644 --- a/storage/local/persistence_test.go +++ b/storage/local/persistence_test.go @@ -462,6 +462,81 @@ func testPersistLoadDropChunks(t *testing.T, encoding chunk.Encoding) { t.Error("all chunks dropped") } } + // Drop all the chunks again. + for fp := range fpToChunks { + firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 100, nil) + if firstTime != 0 { + t.Errorf("want first time 0, got %d", firstTime) + } + if err != nil { + t.Fatal(err) + } + if offset != 0 { + t.Errorf("want offset 0, got %d", offset) + } + if numDropped != 7 { + t.Errorf("want 7 dropped chunks, got %v", numDropped) + } + if !allDropped { + t.Error("not all chunks dropped") + } + } + // Re-add first two of the chunks again. + for fp, chunks := range fpToChunks { + firstTimeNotDropped, offset, numDropped, allDropped, err := + p.dropAndPersistChunks(fp, model.Earliest, chunks[:2]) + if err != nil { + t.Fatal(err) + } + if got, want := firstTimeNotDropped, model.Time(0); got != want { + t.Errorf("Want firstTimeNotDropped %v, got %v.", got, want) + } + if got, want := offset, 0; got != want { + t.Errorf("Want offset %v, got %v.", got, want) + } + if got, want := numDropped, 0; got != want { + t.Errorf("Want numDropped %v, got %v.", got, want) + } + if allDropped { + t.Error("All dropped.") + } + } + // Try to drop the first of the chunks while adding eight more. The drop + // should not happen because of the shrink ratio. Also, this time the + // minimum cut-off point is within the added chunks and not in the file + // anymore. + for fp, chunks := range fpToChunks { + firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 1, chunks[2:]) + if err != nil { + t.Fatal(err) + } + if offset != 2 { + t.Errorf("want offset 2, got %d", offset) + } + if firstTime != 0 { + t.Errorf("want first time 0, got %d", firstTime) + } + if numDropped != 0 { + t.Errorf("want 0 dropped chunk, got %v", numDropped) + } + if allDropped { + t.Error("all chunks dropped") + } + wantChunks := chunks + indexes := make([]int, len(wantChunks)) + for i := range indexes { + indexes[i] = i + } + gotChunks, err := p.loadChunks(fp, indexes, 0) + if err != nil { + t.Fatal(err) + } + for i, wantChunk := range wantChunks { + if !chunksEqual(wantChunk, gotChunks[i]) { + t.Errorf("%d. Chunks not equal.", i) + } + } + } } func TestPersistLoadDropChunksType0(t *testing.T) { From abf1d6e413e94ebde06c181c262d6dab2d2aad3e Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 10 Feb 2017 12:44:57 +0100 Subject: [PATCH 18/37] storage: Fix typo in doc comment --- storage/local/persistence.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/local/persistence.go b/storage/local/persistence.go index f3d840f76a..efb65c4cb5 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -962,7 +962,7 @@ func (p *persistence) dropAndPersistChunks( } // We've found the first chunk that should be kept. - // First check if the shrink ratio is good enough to perform the the + // First check if the shrink ratio is good enough to perform the // actual drop or leave it for next time if it is not worth the effort. fi, err := f.Stat() if err != nil { From d3297e06863728de1507bc18fbcd73b6eec84e7a Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 10 Feb 2017 13:05:32 +0100 Subject: [PATCH 19/37] build: Use Go1.7.4 in Travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3612d19dd7..78c382486f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ sudo: false language: go go: -- 1.7.3 +- 1.7.4 go_import_path: github.com/prometheus/prometheus From 6c17d3068896a32e6db0c95ff8292c098ed97403 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 10 Feb 2017 13:16:17 +0100 Subject: [PATCH 20/37] Cut v1.5.2 --- CHANGELOG.md | 8 ++++++++ VERSION | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8055b4903..9149f3a818 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 1.5.2 / 2017-02-10 + +* [BUGFIX] Fix series corruption in a special case of series maintenance where + the minimum series-file-shrink-ratio kicks in. +* [BUGFIX] Fix two panic conditions both related to processing a series + scheduled to be quarantined. +* [ENHANCEMENT] Binaries built with Go1.7.5. + ## 1.5.1 / 2017-02-07 * [BUGFIX] Don't lose fully persisted memory series during checkpointing. diff --git a/VERSION b/VERSION index 26ca594609..4cda8f19ed 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.5.1 +1.5.2 From 18cd7246b5e70a69fdd84ce08e15130af959d1ea Mon Sep 17 00:00:00 2001 From: Alex Somesan Date: Mon, 13 Feb 2017 08:45:00 -0800 Subject: [PATCH 21/37] Instrument query engine timings (#2418) * Instrument query engine statistics --- promql/engine.go | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/promql/engine.go b/promql/engine.go index ae50acb864..72f019e00b 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -54,11 +54,51 @@ var ( Name: "queries_concurrent_max", Help: "The max number of concurrent queries.", }) + queryPrepareTime = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "query_duration_seconds", + Help: "Query timmings", + ConstLabels: prometheus.Labels{"slice": "prepare_time"}, + }, + ) + queryInnerEval = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "query_duration_seconds", + Help: "Query timmings", + ConstLabels: prometheus.Labels{"slice": "inner_eval"}, + }, + ) + queryResultAppend = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "query_duration_seconds", + Help: "Query timmings", + ConstLabels: prometheus.Labels{"slice": "result_append"}, + }, + ) + queryResultSort = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "query_duration_seconds", + Help: "Query timmings", + ConstLabels: prometheus.Labels{"slice": "result_sort"}, + }, + ) ) func init() { prometheus.MustRegister(currentQueries) prometheus.MustRegister(maxConcurrentQueries) + prometheus.MustRegister(queryPrepareTime) + prometheus.MustRegister(queryInnerEval) + prometheus.MustRegister(queryResultAppend) + prometheus.MustRegister(queryResultSort) } // convertibleToInt64 returns true if v does not over-/underflow an int64. @@ -404,6 +444,8 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( prepareTimer := query.stats.GetTimer(stats.QueryPreparationTime).Start() err = ng.populateIterators(ctx, querier, s) prepareTimer.Stop() + queryPrepareTime.Observe(prepareTimer.ElapsedTime().Seconds()) + if err != nil { return nil, err } @@ -431,6 +473,8 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( } evalTimer.Stop() + queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds()) + return val, nil } numSteps := int(s.End.Sub(s.Start) / s.Interval) @@ -486,6 +530,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( } } evalTimer.Stop() + queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds()) if err := contextDone(ctx, "expression evaluation"); err != nil { return nil, err @@ -497,6 +542,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( mat = append(mat, ss) } appendTimer.Stop() + queryResultAppend.Observe(appendTimer.ElapsedTime().Seconds()) if err := contextDone(ctx, "expression evaluation"); err != nil { return nil, err @@ -508,7 +554,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( sortTimer := query.stats.GetTimer(stats.ResultSortTime).Start() sort.Sort(resMatrix) sortTimer.Stop() - + queryResultSort.Observe(sortTimer.ElapsedTime().Seconds()) return resMatrix, nil } From e7ce10d4e563c5a36b17330ce3135db167d9d4fc Mon Sep 17 00:00:00 2001 From: James Hartig Date: Mon, 13 Feb 2017 15:40:45 -0500 Subject: [PATCH 22/37] vendoring: Update github.com/miekg/dns This is needed to get the NameList function to get query names --- vendor/github.com/miekg/dns/README.md | 5 +- vendor/github.com/miekg/dns/client.go | 14 ++- vendor/github.com/miekg/dns/clientconfig.go | 32 ++++++ vendor/github.com/miekg/dns/dane.go | 9 +- vendor/github.com/miekg/dns/defaults.go | 4 +- vendor/github.com/miekg/dns/dnssec.go | 11 +- vendor/github.com/miekg/dns/dnssec_keygen.go | 16 +-- vendor/github.com/miekg/dns/edns.go | 83 +++++++++++--- vendor/github.com/miekg/dns/msg.go | 82 +++++++------- vendor/github.com/miekg/dns/msg_generate.go | 4 +- vendor/github.com/miekg/dns/msg_helpers.go | 2 - vendor/github.com/miekg/dns/nsecx.go | 11 +- vendor/github.com/miekg/dns/scan_rr.go | 112 +++++++++---------- vendor/github.com/miekg/dns/server.go | 6 +- vendor/github.com/miekg/dns/sig0.go | 5 +- vendor/github.com/miekg/dns/tsig.go | 3 +- vendor/github.com/miekg/dns/types.go | 19 +--- vendor/github.com/miekg/dns/udp.go | 26 +---- vendor/github.com/miekg/dns/udp_linux.go | 34 +++++- vendor/github.com/miekg/dns/udp_other.go | 17 --- vendor/github.com/miekg/dns/udp_plan9.go | 34 ------ vendor/github.com/miekg/dns/udp_windows.go | 9 +- vendor/github.com/miekg/dns/zmsg.go | 34 +++--- vendor/vendor.json | 6 +- 24 files changed, 301 insertions(+), 277 deletions(-) delete mode 100644 vendor/github.com/miekg/dns/udp_other.go delete mode 100644 vendor/github.com/miekg/dns/udp_plan9.go diff --git a/vendor/github.com/miekg/dns/README.md b/vendor/github.com/miekg/dns/README.md index 0e3356cb90..0c1f1b6a9c 100644 --- a/vendor/github.com/miekg/dns/README.md +++ b/vendor/github.com/miekg/dns/README.md @@ -1,4 +1,5 @@ -[![Build Status](https://travis-ci.org/miekg/dns.svg?branch=master)](https://travis-ci.org/miekg/dns) [![](https://godoc.org/github.com/miekg/dns?status.svg)](https://godoc.org/github.com/miekg/dns) +[![Build Status](https://travis-ci.org/miekg/dns.svg?branch=master)](https://travis-ci.org/miekg/dns) +[![](https://godoc.org/github.com/miekg/dns?status.svg)](https://godoc.org/github.com/miekg/dns) # Alternative (more granular) approach to a DNS library @@ -12,7 +13,7 @@ can build servers and resolvers with it. We try to keep the "master" branch as sane as possible and at the bleeding edge of standards, avoiding breaking changes wherever reasonable. We support the last -two versions of Go, currently: 1.5 and 1.6. +two versions of Go, currently: 1.6 and 1.7. # Goals diff --git a/vendor/github.com/miekg/dns/client.go b/vendor/github.com/miekg/dns/client.go index 0db7f7bf68..8b09e189e3 100644 --- a/vendor/github.com/miekg/dns/client.go +++ b/vendor/github.com/miekg/dns/client.go @@ -300,6 +300,18 @@ func tcpMsgLen(t io.Reader) (int, error) { if err != nil { return 0, err } + + // As seen with my local router/switch, retursn 1 byte on the above read, + // resulting a a ShortRead. Just write it out (instead of loop) and read the + // other byte. + if n == 1 { + n1, err := t.Read(p[1:]) + if err != nil { + return 0, err + } + n += n1 + } + if n != 2 { return 0, ErrShortRead } @@ -400,7 +412,7 @@ func (co *Conn) Write(p []byte) (n int, err error) { n, err := io.Copy(w, bytes.NewReader(p)) return int(n), err } - n, err = co.Conn.(*net.UDPConn).Write(p) + n, err = co.Conn.Write(p) return n, err } diff --git a/vendor/github.com/miekg/dns/clientconfig.go b/vendor/github.com/miekg/dns/clientconfig.go index cfa9ad0b22..0a1f5a92c5 100644 --- a/vendor/github.com/miekg/dns/clientconfig.go +++ b/vendor/github.com/miekg/dns/clientconfig.go @@ -97,3 +97,35 @@ func ClientConfigFromFile(resolvconf string) (*ClientConfig, error) { } return c, nil } + +// NameList returns all of the names that should be queried based on the +// config. It is based off of go's net/dns name building, but it does not +// check the length of the resulting names. +func (c *ClientConfig) NameList(name string) []string { + // if this domain is already fully qualified, no append needed. + if IsFqdn(name) { + return []string{name} + } + + // Check to see if the name has more labels than Ndots. Do this before making + // the domain fully qualified. + hasNdots := CountLabel(name) > c.Ndots + // Make the domain fully qualified. + name = Fqdn(name) + + // Make a list of names based off search. + names := []string{} + + // If name has enough dots, try that first. + if hasNdots { + names = append(names, name) + } + for _, s := range c.Search { + names = append(names, Fqdn(name+s)) + } + // If we didn't have enough dots, try after suffixes. + if !hasNdots { + names = append(names, name) + } + return names +} diff --git a/vendor/github.com/miekg/dns/dane.go b/vendor/github.com/miekg/dns/dane.go index cdaa833ff2..8c4a14ef19 100644 --- a/vendor/github.com/miekg/dns/dane.go +++ b/vendor/github.com/miekg/dns/dane.go @@ -6,7 +6,6 @@ import ( "crypto/x509" "encoding/hex" "errors" - "io" ) // CertificateToDANE converts a certificate to a hex string as used in the TLSA or SMIMEA records. @@ -23,20 +22,20 @@ func CertificateToDANE(selector, matchingType uint8, cert *x509.Certificate) (st h := sha256.New() switch selector { case 0: - io.WriteString(h, string(cert.Raw)) + h.Write(cert.Raw) return hex.EncodeToString(h.Sum(nil)), nil case 1: - io.WriteString(h, string(cert.RawSubjectPublicKeyInfo)) + h.Write(cert.RawSubjectPublicKeyInfo) return hex.EncodeToString(h.Sum(nil)), nil } case 2: h := sha512.New() switch selector { case 0: - io.WriteString(h, string(cert.Raw)) + h.Write(cert.Raw) return hex.EncodeToString(h.Sum(nil)), nil case 1: - io.WriteString(h, string(cert.RawSubjectPublicKeyInfo)) + h.Write(cert.RawSubjectPublicKeyInfo) return hex.EncodeToString(h.Sum(nil)), nil } } diff --git a/vendor/github.com/miekg/dns/defaults.go b/vendor/github.com/miekg/dns/defaults.go index cf456165f4..3308ec838b 100644 --- a/vendor/github.com/miekg/dns/defaults.go +++ b/vendor/github.com/miekg/dns/defaults.go @@ -102,11 +102,11 @@ func (dns *Msg) SetAxfr(z string) *Msg { // SetTsig appends a TSIG RR to the message. // This is only a skeleton TSIG RR that is added as the last RR in the // additional section. The Tsig is calculated when the message is being send. -func (dns *Msg) SetTsig(z, algo string, fudge, timesigned int64) *Msg { +func (dns *Msg) SetTsig(z, algo string, fudge uint16, timesigned int64) *Msg { t := new(TSIG) t.Hdr = RR_Header{z, TypeTSIG, ClassANY, 0, 0} t.Algorithm = algo - t.Fudge = 300 + t.Fudge = fudge t.TimeSigned = uint64(timesigned) t.OrigId = dns.Id dns.Extra = append(dns.Extra, t) diff --git a/vendor/github.com/miekg/dns/dnssec.go b/vendor/github.com/miekg/dns/dnssec.go index f5f3fbdd89..9e196859ce 100644 --- a/vendor/github.com/miekg/dns/dnssec.go +++ b/vendor/github.com/miekg/dns/dnssec.go @@ -208,9 +208,6 @@ func (k *DNSKEY) ToDS(h uint8) *DS { // "|" denotes concatenation // DNSKEY RDATA = Flags | Protocol | Algorithm | Public Key. - // digest buffer - digest := append(owner, wire...) // another copy - var hash crypto.Hash switch h { case SHA1: @@ -226,7 +223,8 @@ func (k *DNSKEY) ToDS(h uint8) *DS { } s := hash.New() - s.Write(digest) + s.Write(owner) + s.Write(wire) ds.Digest = hex.EncodeToString(s.Sum(nil)) return ds } @@ -297,7 +295,6 @@ func (rr *RRSIG) Sign(k crypto.Signer, rrset []RR) error { if err != nil { return err } - signdata = append(signdata, wire...) hash, ok := AlgorithmToHash[rr.Algorithm] if !ok { @@ -306,6 +303,7 @@ func (rr *RRSIG) Sign(k crypto.Signer, rrset []RR) error { h := hash.New() h.Write(signdata) + h.Write(wire) signature, err := sign(k, h.Sum(nil), hash, rr.Algorithm) if err != nil { @@ -415,7 +413,6 @@ func (rr *RRSIG) Verify(k *DNSKEY, rrset []RR) error { if err != nil { return err } - signeddata = append(signeddata, wire...) sigbuf := rr.sigBuf() // Get the binary signature data if rr.Algorithm == PRIVATEDNS { // PRIVATEOID @@ -438,6 +435,7 @@ func (rr *RRSIG) Verify(k *DNSKEY, rrset []RR) error { h := hash.New() h.Write(signeddata) + h.Write(wire) return rsa.VerifyPKCS1v15(pubkey, hash, h.Sum(nil), sigbuf) case ECDSAP256SHA256, ECDSAP384SHA384: @@ -452,6 +450,7 @@ func (rr *RRSIG) Verify(k *DNSKEY, rrset []RR) error { h := hash.New() h.Write(signeddata) + h.Write(wire) if ecdsa.Verify(pubkey, h.Sum(nil), r, s) { return nil } diff --git a/vendor/github.com/miekg/dns/dnssec_keygen.go b/vendor/github.com/miekg/dns/dnssec_keygen.go index 229a079370..5e4b7741a6 100644 --- a/vendor/github.com/miekg/dns/dnssec_keygen.go +++ b/vendor/github.com/miekg/dns/dnssec_keygen.go @@ -121,17 +121,17 @@ func (k *DNSKEY) setPublicKeyDSA(_Q, _P, _G, _Y *big.Int) bool { // RFC 3110: Section 2. RSA Public KEY Resource Records func exponentToBuf(_E int) []byte { var buf []byte - i := big.NewInt(int64(_E)) - if len(i.Bytes()) < 256 { - buf = make([]byte, 1) - buf[0] = uint8(len(i.Bytes())) + i := big.NewInt(int64(_E)).Bytes() + if len(i) < 256 { + buf = make([]byte, 1, 1+len(i)) + buf[0] = uint8(len(i)) } else { - buf = make([]byte, 3) + buf = make([]byte, 3, 3+len(i)) buf[0] = 0 - buf[1] = uint8(len(i.Bytes()) >> 8) - buf[2] = uint8(len(i.Bytes())) + buf[1] = uint8(len(i) >> 8) + buf[2] = uint8(len(i)) } - buf = append(buf, i.Bytes()...) + buf = append(buf, i...) return buf } diff --git a/vendor/github.com/miekg/dns/edns.go b/vendor/github.com/miekg/dns/edns.go index 0550aaa399..465b85f37f 100644 --- a/vendor/github.com/miekg/dns/edns.go +++ b/vendor/github.com/miekg/dns/edns.go @@ -4,25 +4,27 @@ import ( "encoding/binary" "encoding/hex" "errors" + "fmt" "net" "strconv" ) // EDNS0 Option codes. const ( - EDNS0LLQ = 0x1 // long lived queries: http://tools.ietf.org/html/draft-sekar-dns-llq-01 - EDNS0UL = 0x2 // update lease draft: http://files.dns-sd.org/draft-sekar-dns-ul.txt - EDNS0NSID = 0x3 // nsid (RFC5001) - EDNS0DAU = 0x5 // DNSSEC Algorithm Understood - EDNS0DHU = 0x6 // DS Hash Understood - EDNS0N3U = 0x7 // NSEC3 Hash Understood - EDNS0SUBNET = 0x8 // client-subnet (RFC6891) - EDNS0EXPIRE = 0x9 // EDNS0 expire - EDNS0COOKIE = 0xa // EDNS0 Cookie - EDNS0SUBNETDRAFT = 0x50fa // Don't use! Use EDNS0SUBNET - EDNS0LOCALSTART = 0xFDE9 // Beginning of range reserved for local/experimental use (RFC6891) - EDNS0LOCALEND = 0xFFFE // End of range reserved for local/experimental use (RFC6891) - _DO = 1 << 15 // dnssec ok + EDNS0LLQ = 0x1 // long lived queries: http://tools.ietf.org/html/draft-sekar-dns-llq-01 + EDNS0UL = 0x2 // update lease draft: http://files.dns-sd.org/draft-sekar-dns-ul.txt + EDNS0NSID = 0x3 // nsid (RFC5001) + EDNS0DAU = 0x5 // DNSSEC Algorithm Understood + EDNS0DHU = 0x6 // DS Hash Understood + EDNS0N3U = 0x7 // NSEC3 Hash Understood + EDNS0SUBNET = 0x8 // client-subnet (RFC6891) + EDNS0EXPIRE = 0x9 // EDNS0 expire + EDNS0COOKIE = 0xa // EDNS0 Cookie + EDNS0TCPKEEPALIVE = 0xb // EDNS0 tcp keep alive (RFC7828) + EDNS0SUBNETDRAFT = 0x50fa // Don't use! Use EDNS0SUBNET + EDNS0LOCALSTART = 0xFDE9 // Beginning of range reserved for local/experimental use (RFC6891) + EDNS0LOCALEND = 0xFFFE // End of range reserved for local/experimental use (RFC6891) + _DO = 1 << 15 // dnssec ok ) // OPT is the EDNS0 RR appended to messages to convey extra (meta) information. @@ -195,7 +197,7 @@ func (e *EDNS0_NSID) String() string { return string(e.Nsid) } // e := new(dns.EDNS0_SUBNET) // e.Code = dns.EDNS0SUBNET // e.Family = 1 // 1 for IPv4 source address, 2 for IPv6 -// e.NetMask = 32 // 32 for IPV4, 128 for IPv6 +// e.SourceNetMask = 32 // 32 for IPV4, 128 for IPv6 // e.SourceScope = 0 // e.Address = net.ParseIP("127.0.0.1").To4() // for IPv4 // // e.Address = net.ParseIP("2001:7b8:32a::2") // for IPV6 @@ -540,3 +542,56 @@ func (e *EDNS0_LOCAL) unpack(b []byte) error { } return nil } + +type EDNS0_TCP_KEEPALIVE struct { + Code uint16 // Always EDNSTCPKEEPALIVE + Length uint16 // the value 0 if the TIMEOUT is omitted, the value 2 if it is present; + Timeout uint16 // an idle timeout value for the TCP connection, specified in units of 100 milliseconds, encoded in network byte order. +} + +func (e *EDNS0_TCP_KEEPALIVE) Option() uint16 { + return EDNS0TCPKEEPALIVE +} + +func (e *EDNS0_TCP_KEEPALIVE) pack() ([]byte, error) { + if e.Timeout != 0 && e.Length != 2 { + return nil, errors.New("dns: timeout specified but length is not 2") + } + if e.Timeout == 0 && e.Length != 0 { + return nil, errors.New("dns: timeout not specified but length is not 0") + } + b := make([]byte, 4+e.Length) + binary.BigEndian.PutUint16(b[0:], e.Code) + binary.BigEndian.PutUint16(b[2:], e.Length) + if e.Length == 2 { + binary.BigEndian.PutUint16(b[4:], e.Timeout) + } + return b, nil +} + +func (e *EDNS0_TCP_KEEPALIVE) unpack(b []byte) error { + if len(b) < 4 { + return ErrBuf + } + e.Length = binary.BigEndian.Uint16(b[2:4]) + if e.Length != 0 && e.Length != 2 { + return errors.New("dns: length mismatch, want 0/2 but got " + strconv.FormatUint(uint64(e.Length), 10)) + } + if e.Length == 2 { + if len(b) < 6 { + return ErrBuf + } + e.Timeout = binary.BigEndian.Uint16(b[4:6]) + } + return nil +} + +func (e *EDNS0_TCP_KEEPALIVE) String() (s string) { + s = "use tcp keep-alive" + if e.Length == 0 { + s += ", timeout omitted" + } else { + s += fmt.Sprintf(", timeout %dms", e.Timeout*100) + } + return +} diff --git a/vendor/github.com/miekg/dns/msg.go b/vendor/github.com/miekg/dns/msg.go index a9acd1e9fe..c74e724fd0 100644 --- a/vendor/github.com/miekg/dns/msg.go +++ b/vendor/github.com/miekg/dns/msg.go @@ -16,22 +16,9 @@ import ( "math/big" "math/rand" "strconv" + "sync" ) -func init() { - // Initialize default math/rand source using crypto/rand to provide better - // security without the performance trade-off. - buf := make([]byte, 8) - _, err := crand.Read(buf) - if err != nil { - // Failed to read from cryptographic source, fallback to default initial - // seed (1) by returning early - return - } - seed := binary.BigEndian.Uint64(buf) - rand.Seed(int64(seed)) -} - const maxCompressionOffset = 2 << 13 // We have 14 bits for the compression pointer var ( @@ -66,11 +53,45 @@ var ( // dns.Id = func() uint16 { return 3 } var Id func() uint16 = id +var ( + idLock sync.Mutex + idRand *rand.Rand +) + // id returns a 16 bits random number to be used as a // message id. The random provided should be good enough. func id() uint16 { - id32 := rand.Uint32() - return uint16(id32) + idLock.Lock() + + if idRand == nil { + // This (partially) works around + // https://github.com/golang/go/issues/11833 by only + // seeding idRand upon the first call to id. + + var seed int64 + var buf [8]byte + + if _, err := crand.Read(buf[:]); err == nil { + seed = int64(binary.LittleEndian.Uint64(buf[:])) + } else { + seed = rand.Int63() + } + + idRand = rand.New(rand.NewSource(seed)) + } + + // The call to idRand.Uint32 must be within the + // mutex lock because *rand.Rand is not safe for + // concurrent use. + // + // There is no added performance overhead to calling + // idRand.Uint32 inside a mutex lock over just + // calling rand.Uint32 as the global math/rand rng + // is internally protected by a sync.Mutex. + id := uint16(idRand.Uint32()) + + idLock.Unlock() + return id } // MsgHdr is a a manually-unpacked version of (id, bits). @@ -203,12 +224,6 @@ func packDomainName(s string, msg []byte, off int, compression map[string]int, c bs[j] = bs[j+2] } ls -= 2 - } else if bs[i] == 't' { - bs[i] = '\t' - } else if bs[i] == 'r' { - bs[i] = '\r' - } else if bs[i] == 'n' { - bs[i] = '\n' } escapedDot = bs[i] == '.' bsFresh = false @@ -335,10 +350,6 @@ Loop: fallthrough case '"', '\\': s = append(s, '\\', b) - case '\t': - s = append(s, '\\', 't') - case '\r': - s = append(s, '\\', 'r') default: if b < 32 || b >= 127 { // unprintable use \DDD var buf [3]byte @@ -431,12 +442,6 @@ func packTxtString(s string, msg []byte, offset int, tmp []byte) (int, error) { if i+2 < len(bs) && isDigit(bs[i]) && isDigit(bs[i+1]) && isDigit(bs[i+2]) { msg[offset] = dddToByte(bs[i:]) i += 2 - } else if bs[i] == 't' { - msg[offset] = '\t' - } else if bs[i] == 'r' { - msg[offset] = '\r' - } else if bs[i] == 'n' { - msg[offset] = '\n' } else { msg[offset] = bs[i] } @@ -508,12 +513,6 @@ func unpackTxtString(msg []byte, offset int) (string, int, error) { switch b { case '"', '\\': s = append(s, '\\', b) - case '\t': - s = append(s, `\t`...) - case '\r': - s = append(s, `\r`...) - case '\n': - s = append(s, `\n`...) default: if b < 32 || b > 127 { // unprintable var buf [3]byte @@ -781,9 +780,6 @@ func (dns *Msg) Unpack(msg []byte) (err error) { if dh, off, err = unpackMsgHdr(msg, off); err != nil { return err } - if off == len(msg) { - return ErrTruncated - } dns.Id = dh.Id dns.Response = (dh.Bits & _QR) != 0 @@ -797,6 +793,10 @@ func (dns *Msg) Unpack(msg []byte) (err error) { dns.CheckingDisabled = (dh.Bits & _CD) != 0 dns.Rcode = int(dh.Bits & 0xF) + if off == len(msg) { + return ErrTruncated + } + // Optimistically use the count given to us in the header dns.Question = make([]Question, 0, int(dh.Qdcount)) diff --git a/vendor/github.com/miekg/dns/msg_generate.go b/vendor/github.com/miekg/dns/msg_generate.go index 35786f22cd..c29447a100 100644 --- a/vendor/github.com/miekg/dns/msg_generate.go +++ b/vendor/github.com/miekg/dns/msg_generate.go @@ -117,9 +117,9 @@ return off, err switch { case st.Tag(i) == `dns:"-"`: // ignored case st.Tag(i) == `dns:"cdomain-name"`: - fallthrough - case st.Tag(i) == `dns:"domain-name"`: o("off, err = PackDomainName(rr.%s, msg, off, compression, compress)\n") + case st.Tag(i) == `dns:"domain-name"`: + o("off, err = PackDomainName(rr.%s, msg, off, compression, false)\n") case st.Tag(i) == `dns:"a"`: o("off, err = packDataA(rr.%s, msg, off)\n") case st.Tag(i) == `dns:"aaaa"`: diff --git a/vendor/github.com/miekg/dns/msg_helpers.go b/vendor/github.com/miekg/dns/msg_helpers.go index e7a9500cc0..494c053771 100644 --- a/vendor/github.com/miekg/dns/msg_helpers.go +++ b/vendor/github.com/miekg/dns/msg_helpers.go @@ -263,8 +263,6 @@ func unpackString(msg []byte, off int) (string, int, error) { switch b { case '"', '\\': s = append(s, '\\', b) - case '\t', '\r', '\n': - s = append(s, b) default: if b < 32 || b > 127 { // unprintable var buf [3]byte diff --git a/vendor/github.com/miekg/dns/nsecx.go b/vendor/github.com/miekg/dns/nsecx.go index 6f10f3e65b..51ce7f8b17 100644 --- a/vendor/github.com/miekg/dns/nsecx.go +++ b/vendor/github.com/miekg/dns/nsecx.go @@ -3,7 +3,6 @@ package dns import ( "crypto/sha1" "hash" - "io" "strings" ) @@ -36,15 +35,15 @@ func HashName(label string, ha uint8, iter uint16, salt string) string { } // k = 0 - name = append(name, wire...) - io.WriteString(s, string(name)) + s.Write(name) + s.Write(wire) nsec3 := s.Sum(nil) // k > 0 for k := uint16(0); k < iter; k++ { s.Reset() - nsec3 = append(nsec3, wire...) - io.WriteString(s, string(nsec3)) - nsec3 = s.Sum(nil) + s.Write(nsec3) + s.Write(wire) + nsec3 = s.Sum(nsec3[:0]) } return toBase32(nsec3) } diff --git a/vendor/github.com/miekg/dns/scan_rr.go b/vendor/github.com/miekg/dns/scan_rr.go index 675fc80d81..8d6a1bf24a 100644 --- a/vendor/github.com/miekg/dns/scan_rr.go +++ b/vendor/github.com/miekg/dns/scan_rr.go @@ -64,74 +64,63 @@ func endingToString(c chan lex, errstr, f string) (string, *ParseError, string) return s, nil, l.comment } -// A remainder of the rdata with embedded spaces, return the parsed string slice (sans the spaces) -// or an error +// A remainder of the rdata with embedded spaces, split on unquoted whitespace +// and return the parsed string slice or an error func endingToTxtSlice(c chan lex, errstr, f string) ([]string, *ParseError, string) { // Get the remaining data until we see a zNewline - quote := false l := <-c - var s []string if l.err { - return s, &ParseError{f, errstr, l}, "" + return nil, &ParseError{f, errstr, l}, "" } - switch l.value == zQuote { - case true: // A number of quoted string - s = make([]string, 0) - empty := true - for l.value != zNewline && l.value != zEOF { - if l.err { - return nil, &ParseError{f, errstr, l}, "" - } - switch l.value { - case zString: - empty = false - if len(l.token) > 255 { - // split up tokens that are larger than 255 into 255-chunks - sx := []string{} - p, i := 0, 255 - for { - if i <= len(l.token) { - sx = append(sx, l.token[p:i]) - } else { - sx = append(sx, l.token[p:]) - break - } - p, i = p+255, i+255 - } - s = append(s, sx...) - break - } - - s = append(s, l.token) - case zBlank: - if quote { - // zBlank can only be seen in between txt parts. - return nil, &ParseError{f, errstr, l}, "" - } - case zQuote: - if empty && quote { - s = append(s, "") - } - quote = !quote - empty = true - default: - return nil, &ParseError{f, errstr, l}, "" - } - l = <-c - } - if quote { + // Build the slice + s := make([]string, 0) + quote := false + empty := false + for l.value != zNewline && l.value != zEOF { + if l.err { return nil, &ParseError{f, errstr, l}, "" } - case false: // Unquoted text record - s = make([]string, 1) - for l.value != zNewline && l.value != zEOF { - if l.err { - return s, &ParseError{f, errstr, l}, "" + switch l.value { + case zString: + empty = false + if len(l.token) > 255 { + // split up tokens that are larger than 255 into 255-chunks + sx := []string{} + p, i := 0, 255 + for { + if i <= len(l.token) { + sx = append(sx, l.token[p:i]) + } else { + sx = append(sx, l.token[p:]) + break + + } + p, i = p+255, i+255 + } + s = append(s, sx...) + break } - s[0] += l.token - l = <-c + + s = append(s, l.token) + case zBlank: + if quote { + // zBlank can only be seen in between txt parts. + return nil, &ParseError{f, errstr, l}, "" + } + case zQuote: + if empty && quote { + s = append(s, "") + } + quote = !quote + empty = true + default: + return nil, &ParseError{f, errstr, l}, "" } + l = <-c + } + if quote { + return nil, &ParseError{f, errstr, l}, "" } return s, nil, l.comment } @@ -2027,9 +2016,12 @@ func setUINFO(h RR_Header, c chan lex, o, f string) (RR, *ParseError, string) { rr.Hdr = h s, e, c1 := endingToTxtSlice(c, "bad UINFO Uinfo", f) if e != nil { - return nil, e, "" + return nil, e, c1 } - rr.Uinfo = s[0] // silently discard anything above + if ln := len(s); ln == 0 { + return rr, nil, c1 + } + rr.Uinfo = s[0] // silently discard anything after the first character-string return rr, nil, c1 } diff --git a/vendor/github.com/miekg/dns/server.go b/vendor/github.com/miekg/dns/server.go index c34801aef8..0ca6e008c0 100644 --- a/vendor/github.com/miekg/dns/server.go +++ b/vendor/github.com/miekg/dns/server.go @@ -339,7 +339,7 @@ func (srv *Server) ListenAndServe() error { network := "tcp" if srv.Net == "tcp4-tls" { network = "tcp4" - } else if srv.Net == "tcp6" { + } else if srv.Net == "tcp6-tls" { network = "tcp6" } @@ -389,7 +389,9 @@ func (srv *Server) ActivateAndServe() error { if srv.UDPSize == 0 { srv.UDPSize = MinMsgSize } - if t, ok := pConn.(*net.UDPConn); ok { + // Check PacketConn interface's type is valid and value + // is not nil + if t, ok := pConn.(*net.UDPConn); ok && t != nil { if e := setUDPSocketOptions(t); e != nil { return e } diff --git a/vendor/github.com/miekg/dns/sig0.go b/vendor/github.com/miekg/dns/sig0.go index 2dce06af82..f31e9e6843 100644 --- a/vendor/github.com/miekg/dns/sig0.go +++ b/vendor/github.com/miekg/dns/sig0.go @@ -60,16 +60,15 @@ func (rr *SIG) Sign(k crypto.Signer, m *Msg) ([]byte, error) { } rr.Signature = toBase64(signature) - sig := string(signature) - buf = append(buf, sig...) + buf = append(buf, signature...) if len(buf) > int(^uint16(0)) { return nil, ErrBuf } // Adjust sig data length rdoff := len(mbuf) + 1 + 2 + 2 + 4 rdlen := binary.BigEndian.Uint16(buf[rdoff:]) - rdlen += uint16(len(sig)) + rdlen += uint16(len(signature)) binary.BigEndian.PutUint16(buf[rdoff:], rdlen) // Adjust additional count adc := binary.BigEndian.Uint16(buf[10:]) diff --git a/vendor/github.com/miekg/dns/tsig.go b/vendor/github.com/miekg/dns/tsig.go index 78365e1c5b..24013096b0 100644 --- a/vendor/github.com/miekg/dns/tsig.go +++ b/vendor/github.com/miekg/dns/tsig.go @@ -9,7 +9,6 @@ import ( "encoding/binary" "encoding/hex" "hash" - "io" "strconv" "strings" "time" @@ -124,7 +123,7 @@ func TsigGenerate(m *Msg, secret, requestMAC string, timersOnly bool) ([]byte, s default: return nil, "", ErrKeyAlg } - io.WriteString(h, string(buf)) + h.Write(buf) t.MAC = hex.EncodeToString(h.Sum(nil)) t.MACSize = uint16(len(t.MAC) / 2) // Size is half! diff --git a/vendor/github.com/miekg/dns/types.go b/vendor/github.com/miekg/dns/types.go index f63a18b332..c8b3191e5a 100644 --- a/vendor/github.com/miekg/dns/types.go +++ b/vendor/github.com/miekg/dns/types.go @@ -480,12 +480,6 @@ func appendDomainNameByte(s []byte, b byte) []byte { func appendTXTStringByte(s []byte, b byte) []byte { switch b { - case '\t': - return append(s, '\\', 't') - case '\r': - return append(s, '\\', 'r') - case '\n': - return append(s, '\\', 'n') case '"', '\\': return append(s, '\\', b) } @@ -525,17 +519,8 @@ func nextByte(b []byte, offset int) (byte, int) { return dddToByte(b[offset+1:]), 4 } } - // not \ddd, maybe a control char - switch b[offset+1] { - case 't': - return '\t', 2 - case 'r': - return '\r', 2 - case 'n': - return '\n', 2 - default: - return b[offset+1], 2 - } + // not \ddd, just an RFC 1035 "quoted" character + return b[offset+1], 2 } type SPF struct { diff --git a/vendor/github.com/miekg/dns/udp.go b/vendor/github.com/miekg/dns/udp.go index c79c6c8837..af111b9a89 100644 --- a/vendor/github.com/miekg/dns/udp.go +++ b/vendor/github.com/miekg/dns/udp.go @@ -1,10 +1,9 @@ -// +build !windows,!plan9 +// +build !windows package dns import ( "net" - "syscall" ) // SessionUDP holds the remote address and the associated @@ -17,29 +16,6 @@ type SessionUDP struct { // RemoteAddr returns the remote network address. func (s *SessionUDP) RemoteAddr() net.Addr { return s.raddr } -// setUDPSocketOptions sets the UDP socket options. -// This function is implemented on a per platform basis. See udp_*.go for more details -func setUDPSocketOptions(conn *net.UDPConn) error { - sa, err := getUDPSocketName(conn) - if err != nil { - return err - } - switch sa.(type) { - case *syscall.SockaddrInet6: - v6only, err := getUDPSocketOptions6Only(conn) - if err != nil { - return err - } - setUDPSocketOptions6(conn) - if !v6only { - setUDPSocketOptions4(conn) - } - case *syscall.SockaddrInet4: - setUDPSocketOptions4(conn) - } - return nil -} - // ReadFromSessionUDP acts just like net.UDPConn.ReadFrom(), but returns a session object instead of a // net.UDPAddr. func ReadFromSessionUDP(conn *net.UDPConn, b []byte) (int, *SessionUDP, error) { diff --git a/vendor/github.com/miekg/dns/udp_linux.go b/vendor/github.com/miekg/dns/udp_linux.go index c62d21881b..033df42399 100644 --- a/vendor/github.com/miekg/dns/udp_linux.go +++ b/vendor/github.com/miekg/dns/udp_linux.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux,!appengine package dns @@ -15,6 +15,29 @@ import ( "syscall" ) +// setUDPSocketOptions sets the UDP socket options. +// This function is implemented on a per platform basis. See udp_*.go for more details +func setUDPSocketOptions(conn *net.UDPConn) error { + sa, err := getUDPSocketName(conn) + if err != nil { + return err + } + switch sa.(type) { + case *syscall.SockaddrInet6: + v6only, err := getUDPSocketOptions6Only(conn) + if err != nil { + return err + } + setUDPSocketOptions6(conn) + if !v6only { + setUDPSocketOptions4(conn) + } + case *syscall.SockaddrInet4: + setUDPSocketOptions4(conn) + } + return nil +} + // setUDPSocketOptions4 prepares the v4 socket for sessions. func setUDPSocketOptions4(conn *net.UDPConn) error { file, err := conn.File() @@ -22,14 +45,17 @@ func setUDPSocketOptions4(conn *net.UDPConn) error { return err } if err := syscall.SetsockoptInt(int(file.Fd()), syscall.IPPROTO_IP, syscall.IP_PKTINFO, 1); err != nil { + file.Close() return err } // Calling File() above results in the connection becoming blocking, we must fix that. // See https://github.com/miekg/dns/issues/279 err = syscall.SetNonblock(int(file.Fd()), true) if err != nil { + file.Close() return err } + file.Close() return nil } @@ -40,12 +66,15 @@ func setUDPSocketOptions6(conn *net.UDPConn) error { return err } if err := syscall.SetsockoptInt(int(file.Fd()), syscall.IPPROTO_IPV6, syscall.IPV6_RECVPKTINFO, 1); err != nil { + file.Close() return err } err = syscall.SetNonblock(int(file.Fd()), true) if err != nil { + file.Close() return err } + file.Close() return nil } @@ -59,8 +88,10 @@ func getUDPSocketOptions6Only(conn *net.UDPConn) (bool, error) { // dual stack. See http://stackoverflow.com/questions/1618240/how-to-support-both-ipv4-and-ipv6-connections v6only, err := syscall.GetsockoptInt(int(file.Fd()), syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY) if err != nil { + file.Close() return false, err } + file.Close() return v6only == 1, nil } @@ -69,5 +100,6 @@ func getUDPSocketName(conn *net.UDPConn) (syscall.Sockaddr, error) { if err != nil { return nil, err } + defer file.Close() return syscall.Getsockname(int(file.Fd())) } diff --git a/vendor/github.com/miekg/dns/udp_other.go b/vendor/github.com/miekg/dns/udp_other.go deleted file mode 100644 index d40732441b..0000000000 --- a/vendor/github.com/miekg/dns/udp_other.go +++ /dev/null @@ -1,17 +0,0 @@ -// +build !linux,!plan9 - -package dns - -import ( - "net" - "syscall" -) - -// These do nothing. See udp_linux.go for an example of how to implement this. - -// We tried to adhire to some kind of naming scheme. - -func setUDPSocketOptions4(conn *net.UDPConn) error { return nil } -func setUDPSocketOptions6(conn *net.UDPConn) error { return nil } -func getUDPSocketOptions6Only(conn *net.UDPConn) (bool, error) { return false, nil } -func getUDPSocketName(conn *net.UDPConn) (syscall.Sockaddr, error) { return nil, nil } diff --git a/vendor/github.com/miekg/dns/udp_plan9.go b/vendor/github.com/miekg/dns/udp_plan9.go deleted file mode 100644 index b794deeba0..0000000000 --- a/vendor/github.com/miekg/dns/udp_plan9.go +++ /dev/null @@ -1,34 +0,0 @@ -package dns - -import ( - "net" -) - -func setUDPSocketOptions(conn *net.UDPConn) error { return nil } - -// SessionUDP holds the remote address and the associated -// out-of-band data. -type SessionUDP struct { - raddr *net.UDPAddr - context []byte -} - -// RemoteAddr returns the remote network address. -func (s *SessionUDP) RemoteAddr() net.Addr { return s.raddr } - -// ReadFromSessionUDP acts just like net.UDPConn.ReadFrom(), but returns a session object instead of a -// net.UDPAddr. -func ReadFromSessionUDP(conn *net.UDPConn, b []byte) (int, *SessionUDP, error) { - oob := make([]byte, 40) - n, oobn, _, raddr, err := conn.ReadMsgUDP(b, oob) - if err != nil { - return n, nil, err - } - return n, &SessionUDP{raddr, oob[:oobn]}, err -} - -// WriteToSessionUDP acts just like net.UDPConn.WritetTo(), but uses a *SessionUDP instead of a net.Addr. -func WriteToSessionUDP(conn *net.UDPConn, b []byte, session *SessionUDP) (int, error) { - n, _, err := conn.WriteMsgUDP(b, session.context, session.raddr) - return n, err -} diff --git a/vendor/github.com/miekg/dns/udp_windows.go b/vendor/github.com/miekg/dns/udp_windows.go index 2ce4b33002..51e532ac2a 100644 --- a/vendor/github.com/miekg/dns/udp_windows.go +++ b/vendor/github.com/miekg/dns/udp_windows.go @@ -8,6 +8,8 @@ type SessionUDP struct { raddr *net.UDPAddr } +func (s *SessionUDP) RemoteAddr() net.Addr { return s.raddr } + // ReadFromSessionUDP acts just like net.UDPConn.ReadFrom(), but returns a session object instead of a // net.UDPAddr. func ReadFromSessionUDP(conn *net.UDPConn, b []byte) (int, *SessionUDP, error) { @@ -25,10 +27,3 @@ func WriteToSessionUDP(conn *net.UDPConn, b []byte, session *SessionUDP) (int, e return n, err } -func (s *SessionUDP) RemoteAddr() net.Addr { return s.raddr } - -// setUDPSocketOptions sets the UDP socket options. -// This function is implemented on a per platform basis. See udp_*.go for more details -func setUDPSocketOptions(conn *net.UDPConn) error { - return nil -} diff --git a/vendor/github.com/miekg/dns/zmsg.go b/vendor/github.com/miekg/dns/zmsg.go index c561370e71..94627e35e3 100644 --- a/vendor/github.com/miekg/dns/zmsg.go +++ b/vendor/github.com/miekg/dns/zmsg.go @@ -221,7 +221,7 @@ func (rr *DNAME) pack(msg []byte, off int, compression map[string]int, compress return off, err } headerEnd := off - off, err = PackDomainName(rr.Target, msg, off, compression, compress) + off, err = PackDomainName(rr.Target, msg, off, compression, false) if err != nil { return off, err } @@ -447,7 +447,7 @@ func (rr *KX) pack(msg []byte, off int, compression map[string]int, compress boo if err != nil { return off, err } - off, err = PackDomainName(rr.Exchanger, msg, off, compression, compress) + off, err = PackDomainName(rr.Exchanger, msg, off, compression, false) if err != nil { return off, err } @@ -539,7 +539,7 @@ func (rr *LP) pack(msg []byte, off int, compression map[string]int, compress boo if err != nil { return off, err } - off, err = PackDomainName(rr.Fqdn, msg, off, compression, compress) + off, err = PackDomainName(rr.Fqdn, msg, off, compression, false) if err != nil { return off, err } @@ -679,7 +679,7 @@ func (rr *NAPTR) pack(msg []byte, off int, compression map[string]int, compress if err != nil { return off, err } - off, err = PackDomainName(rr.Replacement, msg, off, compression, compress) + off, err = PackDomainName(rr.Replacement, msg, off, compression, false) if err != nil { return off, err } @@ -753,7 +753,7 @@ func (rr *NSAPPTR) pack(msg []byte, off int, compression map[string]int, compres return off, err } headerEnd := off - off, err = PackDomainName(rr.Ptr, msg, off, compression, compress) + off, err = PackDomainName(rr.Ptr, msg, off, compression, false) if err != nil { return off, err } @@ -767,7 +767,7 @@ func (rr *NSEC) pack(msg []byte, off int, compression map[string]int, compress b return off, err } headerEnd := off - off, err = PackDomainName(rr.NextDomain, msg, off, compression, compress) + off, err = PackDomainName(rr.NextDomain, msg, off, compression, false) if err != nil { return off, err } @@ -905,11 +905,11 @@ func (rr *PX) pack(msg []byte, off int, compression map[string]int, compress boo if err != nil { return off, err } - off, err = PackDomainName(rr.Map822, msg, off, compression, compress) + off, err = PackDomainName(rr.Map822, msg, off, compression, false) if err != nil { return off, err } - off, err = PackDomainName(rr.Mapx400, msg, off, compression, compress) + off, err = PackDomainName(rr.Mapx400, msg, off, compression, false) if err != nil { return off, err } @@ -963,11 +963,11 @@ func (rr *RP) pack(msg []byte, off int, compression map[string]int, compress boo return off, err } headerEnd := off - off, err = PackDomainName(rr.Mbox, msg, off, compression, compress) + off, err = PackDomainName(rr.Mbox, msg, off, compression, false) if err != nil { return off, err } - off, err = PackDomainName(rr.Txt, msg, off, compression, compress) + off, err = PackDomainName(rr.Txt, msg, off, compression, false) if err != nil { return off, err } @@ -1009,7 +1009,7 @@ func (rr *RRSIG) pack(msg []byte, off int, compression map[string]int, compress if err != nil { return off, err } - off, err = PackDomainName(rr.SignerName, msg, off, compression, compress) + off, err = PackDomainName(rr.SignerName, msg, off, compression, false) if err != nil { return off, err } @@ -1073,7 +1073,7 @@ func (rr *SIG) pack(msg []byte, off int, compression map[string]int, compress bo if err != nil { return off, err } - off, err = PackDomainName(rr.SignerName, msg, off, compression, compress) + off, err = PackDomainName(rr.SignerName, msg, off, compression, false) if err != nil { return off, err } @@ -1181,7 +1181,7 @@ func (rr *SRV) pack(msg []byte, off int, compression map[string]int, compress bo if err != nil { return off, err } - off, err = PackDomainName(rr.Target, msg, off, compression, compress) + off, err = PackDomainName(rr.Target, msg, off, compression, false) if err != nil { return off, err } @@ -1243,11 +1243,11 @@ func (rr *TALINK) pack(msg []byte, off int, compression map[string]int, compress return off, err } headerEnd := off - off, err = PackDomainName(rr.PreviousName, msg, off, compression, compress) + off, err = PackDomainName(rr.PreviousName, msg, off, compression, false) if err != nil { return off, err } - off, err = PackDomainName(rr.NextName, msg, off, compression, compress) + off, err = PackDomainName(rr.NextName, msg, off, compression, false) if err != nil { return off, err } @@ -1261,7 +1261,7 @@ func (rr *TKEY) pack(msg []byte, off int, compression map[string]int, compress b return off, err } headerEnd := off - off, err = PackDomainName(rr.Algorithm, msg, off, compression, compress) + off, err = PackDomainName(rr.Algorithm, msg, off, compression, false) if err != nil { return off, err } @@ -1333,7 +1333,7 @@ func (rr *TSIG) pack(msg []byte, off int, compression map[string]int, compress b return off, err } headerEnd := off - off, err = PackDomainName(rr.Algorithm, msg, off, compression, compress) + off, err = PackDomainName(rr.Algorithm, msg, off, compression, false) if err != nil { return off, err } diff --git a/vendor/vendor.json b/vendor/vendor.json index 631073955e..2de3d820a0 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -508,10 +508,10 @@ "revisionTime": "2015-04-06T19:39:34+02:00" }, { - "checksumSHA1": "Wahi4g/9XiHhSLAJ+8jskg71PCU=", + "checksumSHA1": "wJxkqZ80Ux6+klKr1+3erSZjVnQ=", "path": "github.com/miekg/dns", - "revision": "58f52c57ce9df13460ac68200cef30a008b9c468", - "revisionTime": "2016-10-18T06:08:08Z" + "revision": "672033dedc09500ca4d340760d0b80b9c0b198bd", + "revisionTime": "2017-02-13T20:16:50Z" }, { "checksumSHA1": "3YJklSuzSE1Rt8A+2dhiWSmf/fw=", From 865f28bb153434dc12ebe695a57ff39a9e3cfe44 Mon Sep 17 00:00:00 2001 From: James Hartig Date: Mon, 13 Feb 2017 15:48:51 -0500 Subject: [PATCH 23/37] discovery: Instead of looping over conf.Search, use NameList() --- discovery/dns/dns.go | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index ca9ed56fc0..9890de4aee 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -179,13 +179,12 @@ func lookupAll(name string, qtype uint16) (*dns.Msg, error) { for _, server := range conf.Servers { servAddr := net.JoinHostPort(server, conf.Port) - for _, suffix := range conf.Search { - response, err = lookup(name, qtype, client, servAddr, suffix, false) + for _, lname := range conf.NameList(name) { + response, err = lookup(lname, qtype, client, servAddr, false) if err != nil { log. With("server", server). With("name", name). - With("suffix", suffix). With("reason", err). Warn("DNS resolution failed.") continue @@ -194,22 +193,12 @@ func lookupAll(name string, qtype uint16) (*dns.Msg, error) { return response, nil } } - response, err = lookup(name, qtype, client, servAddr, "", false) - if err == nil { - return response, nil - } - log. - With("server", server). - With("name", name). - With("reason", err). - Warn("DNS resolution failed.") } return response, fmt.Errorf("could not resolve %s: no server responded", name) } -func lookup(name string, queryType uint16, client *dns.Client, servAddr string, suffix string, edns bool) (*dns.Msg, error) { +func lookup(lname string, queryType uint16, client *dns.Client, servAddr string, edns bool) (*dns.Msg, error) { msg := &dns.Msg{} - lname := strings.Join([]string{name, suffix}, ".") msg.SetQuestion(dns.Fqdn(lname), queryType) if edns { @@ -224,7 +213,7 @@ func lookup(name string, queryType uint16, client *dns.Client, servAddr string, if edns { // Truncated even though EDNS is used client.Net = "tcp" } - return lookup(name, queryType, client, servAddr, suffix, !edns) + return lookup(lname, queryType, client, servAddr, !edns) } if err != nil { return nil, err From beb3c4b38950f588cb52bfde60fc7dff45318989 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 13 Feb 2017 23:11:57 +0100 Subject: [PATCH 24/37] Remove legacy remote storage implementations This removes legacy support for specific remote storage systems in favor of only offering the generic remote write protocol. An example bridge application that translates from the generic protocol to each of those legacy backends is still provided at: documentation/examples/remote_storage/remote_storage_bridge See also https://github.com/prometheus/prometheus/issues/10 The next step in the plan is to re-add support for multiple remote storages. --- cmd/prometheus/config.go | 101 ++++++---------- cmd/prometheus/config_test.go | 13 --- cmd/prometheus/main.go | 23 +--- .../remote_storage_bridge}/graphite/client.go | 0 .../graphite/client_test.go | 0 .../remote_storage_bridge}/graphite/escape.go | 0 .../remote_storage_bridge}/influxdb/client.go | 0 .../influxdb/client_test.go | 0 .../remote_storage_bridge/main.go | 9 +- .../remote_storage_bridge}/opentsdb/client.go | 0 .../opentsdb/client_test.go | 0 .../opentsdb/tagvalue.go | 0 .../opentsdb/tagvalue_test.go | 0 storage/remote/remote.go | 104 ++++++----------- storage/remote/remote_reloadable.go | 108 ------------------ 15 files changed, 79 insertions(+), 279 deletions(-) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/graphite/client.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/graphite/client_test.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/graphite/escape.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/influxdb/client.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/influxdb/client_test.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/opentsdb/client.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/opentsdb/client_test.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/opentsdb/tagvalue.go (100%) rename {storage/remote => documentation/examples/remote_storage/remote_storage_bridge}/opentsdb/tagvalue_test.go (100%) delete mode 100644 storage/remote/remote_reloadable.go diff --git a/cmd/prometheus/config.go b/cmd/prometheus/config.go index 60b303adfd..4bc249880c 100644 --- a/cmd/prometheus/config.go +++ b/cmd/prometheus/config.go @@ -34,7 +34,6 @@ import ( "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/storage/local/chunk" "github.com/prometheus/prometheus/storage/local/index" - "github.com/prometheus/prometheus/storage/remote" "github.com/prometheus/prometheus/web" ) @@ -52,15 +51,37 @@ var cfg = struct { notifierTimeout time.Duration queryEngine promql.EngineOptions web web.Options - remote remote.Options alertmanagerURLs stringset prometheusURL string - influxdbURL string }{ alertmanagerURLs: stringset{}, } +// Value type for flags that are now unused, but which are kept around to +// fulfill 1.0 stability guarantees. +type unusedFlag struct { + name string + value string + help string +} + +func (f *unusedFlag) Set(v string) error { + f.value = v + log.Warnf("Flag %q is unused, but set to %q! See the flag's help message: %s", f.name, f.value, f.help) + return nil +} + +func (f unusedFlag) String() string { + return f.value +} + +func registerUnusedFlags(fs *flag.FlagSet, help string, flags []string) { + for _, name := range flags { + fs.Var(&unusedFlag{name: name, help: help}, name, help) + } +} + func init() { cfg.fs = flag.NewFlagSet(os.Args[0], flag.ContinueOnError) cfg.fs.Usage = usage @@ -190,44 +211,19 @@ func init() { "Local storage engine. Supported values are: 'persisted' (full local storage with on-disk persistence) and 'none' (no local storage).", ) - // Remote storage. - cfg.fs.StringVar( - &cfg.remote.GraphiteAddress, "storage.remote.graphite-address", "", - "The host:port of the remote Graphite server to send samples to. None, if empty.", - ) - cfg.fs.StringVar( - &cfg.remote.GraphiteTransport, "storage.remote.graphite-transport", "tcp", - "Transport protocol to use to communicate with Graphite. 'tcp', if empty.", - ) - cfg.fs.StringVar( - &cfg.remote.GraphitePrefix, "storage.remote.graphite-prefix", "", - "The prefix to prepend to all metrics exported to Graphite. None, if empty.", - ) - cfg.fs.StringVar( - &cfg.remote.OpentsdbURL, "storage.remote.opentsdb-url", "", - "The URL of the remote OpenTSDB server to send samples to. None, if empty.", - ) - cfg.fs.StringVar( - &cfg.influxdbURL, "storage.remote.influxdb-url", "", - "The URL of the remote InfluxDB server to send samples to. None, if empty.", - ) - cfg.fs.StringVar( - &cfg.remote.InfluxdbRetentionPolicy, "storage.remote.influxdb.retention-policy", "default", - "The InfluxDB retention policy to use.", - ) - cfg.fs.StringVar( - &cfg.remote.InfluxdbUsername, "storage.remote.influxdb.username", "", - "The username to use when sending samples to InfluxDB. The corresponding password must be provided via the INFLUXDB_PW environment variable.", - ) - cfg.fs.StringVar( - &cfg.remote.InfluxdbDatabase, "storage.remote.influxdb.database", "prometheus", - "The name of the database to use for storing samples in InfluxDB.", - ) - - cfg.fs.DurationVar( - &cfg.remote.StorageTimeout, "storage.remote.timeout", 30*time.Second, - "The timeout to use when sending samples to the remote storage.", - ) + // Unused flags for removed remote storage code. + const remoteStorageFlagsHelp = "WARNING: THIS FLAG IS UNUSED! Built-in support for InfluxDB, Graphite, and OpenTSDB has been removed. Use Prometheus's generic remote write feature for building remote storage integrations. See https://prometheus.io/docs/operating/configuration/#" + registerUnusedFlags(cfg.fs, remoteStorageFlagsHelp, []string{ + "storage.remote.graphite-address", + "storage.remote.graphite-transport", + "storage.remote.graphite-prefix", + "storage.remote.opentsdb-url", + "storage.remote.influxdb-url", + "storage.remote.influxdb.retention-policy", + "storage.remote.influxdb.username", + "storage.remote.influxdb.database", + "storage.remote.timeout", + }) // Alertmanager. cfg.fs.Var( @@ -287,17 +283,12 @@ func parse(args []string) error { // RoutePrefix must always be at least '/'. cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") - if err := parseInfluxdbURL(); err != nil { - return err - } for u := range cfg.alertmanagerURLs { if err := validateAlertmanagerURL(u); err != nil { return err } } - cfg.remote.InfluxdbPassword = os.Getenv("INFLUXDB_PW") - return nil } @@ -332,24 +323,6 @@ func parsePrometheusURL() error { return nil } -func parseInfluxdbURL() error { - if cfg.influxdbURL == "" { - return nil - } - - if ok := govalidator.IsURL(cfg.influxdbURL); !ok { - return fmt.Errorf("invalid InfluxDB URL: %s", cfg.influxdbURL) - } - - url, err := url.Parse(cfg.influxdbURL) - if err != nil { - return err - } - - cfg.remote.InfluxdbURL = url - return nil -} - func validateAlertmanagerURL(u string) error { if u == "" { return nil diff --git a/cmd/prometheus/config_test.go b/cmd/prometheus/config_test.go index a4e396407a..1bc1aceddc 100644 --- a/cmd/prometheus/config_test.go +++ b/cmd/prometheus/config_test.go @@ -36,18 +36,6 @@ func TestParse(t *testing.T) { input: []string{"-web.external-url", "'https://url/prometheus'"}, valid: false, }, - { - input: []string{"-storage.remote.influxdb-url", ""}, - valid: true, - }, - { - input: []string{"-storage.remote.influxdb-url", "http://localhost:8086/"}, - valid: true, - }, - { - input: []string{"-storage.remote.influxdb-url", "'https://some-url/'"}, - valid: false, - }, { input: []string{"-alertmanager.url", ""}, valid: true, @@ -69,7 +57,6 @@ func TestParse(t *testing.T) { for i, test := range tests { // reset "immutable" config cfg.prometheusURL = "" - cfg.influxdbURL = "" cfg.alertmanagerURLs = stringset{} err := parse(test.input) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index de1c348a02..c59295861e 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -92,19 +92,9 @@ func Main() int { return 1 } - remoteStorage, err := remote.New(&cfg.remote) - if err != nil { - log.Errorf("Error initializing remote storage: %s", err) - return 1 - } - if remoteStorage != nil { - sampleAppender = append(sampleAppender, remoteStorage) - reloadables = append(reloadables, remoteStorage) - } - - reloadableRemoteStorage := remote.NewConfigurable() - sampleAppender = append(sampleAppender, reloadableRemoteStorage) - reloadables = append(reloadables, reloadableRemoteStorage) + remoteStorage := &remote.Storage{} + sampleAppender = append(sampleAppender, remoteStorage) + reloadables = append(reloadables, remoteStorage) var ( notifier = notifier.New(&cfg.notifier) @@ -188,12 +178,7 @@ func Main() int { } }() - if remoteStorage != nil { - remoteStorage.Start() - defer remoteStorage.Stop() - } - - defer reloadableRemoteStorage.Stop() + defer remoteStorage.Stop() // The storage has to be fully initialized before registering. if instrumentedStorage, ok := localStorage.(prometheus.Collector); ok { diff --git a/storage/remote/graphite/client.go b/documentation/examples/remote_storage/remote_storage_bridge/graphite/client.go similarity index 100% rename from storage/remote/graphite/client.go rename to documentation/examples/remote_storage/remote_storage_bridge/graphite/client.go diff --git a/storage/remote/graphite/client_test.go b/documentation/examples/remote_storage/remote_storage_bridge/graphite/client_test.go similarity index 100% rename from storage/remote/graphite/client_test.go rename to documentation/examples/remote_storage/remote_storage_bridge/graphite/client_test.go diff --git a/storage/remote/graphite/escape.go b/documentation/examples/remote_storage/remote_storage_bridge/graphite/escape.go similarity index 100% rename from storage/remote/graphite/escape.go rename to documentation/examples/remote_storage/remote_storage_bridge/graphite/escape.go diff --git a/storage/remote/influxdb/client.go b/documentation/examples/remote_storage/remote_storage_bridge/influxdb/client.go similarity index 100% rename from storage/remote/influxdb/client.go rename to documentation/examples/remote_storage/remote_storage_bridge/influxdb/client.go diff --git a/storage/remote/influxdb/client_test.go b/documentation/examples/remote_storage/remote_storage_bridge/influxdb/client_test.go similarity index 100% rename from storage/remote/influxdb/client_test.go rename to documentation/examples/remote_storage/remote_storage_bridge/influxdb/client_test.go diff --git a/documentation/examples/remote_storage/remote_storage_bridge/main.go b/documentation/examples/remote_storage/remote_storage_bridge/main.go index b5b3f12ab9..d6e7fcb457 100644 --- a/documentation/examples/remote_storage/remote_storage_bridge/main.go +++ b/documentation/examples/remote_storage/remote_storage_bridge/main.go @@ -29,12 +29,13 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/storage/remote" - "github.com/prometheus/prometheus/storage/remote/graphite" - "github.com/prometheus/prometheus/storage/remote/influxdb" - "github.com/prometheus/prometheus/storage/remote/opentsdb" influx "github.com/influxdb/influxdb/client" + + "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_bridge/graphite" + "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_bridge/influxdb" + "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_bridge/opentsdb" + "github.com/prometheus/prometheus/storage/remote" ) type config struct { diff --git a/storage/remote/opentsdb/client.go b/documentation/examples/remote_storage/remote_storage_bridge/opentsdb/client.go similarity index 100% rename from storage/remote/opentsdb/client.go rename to documentation/examples/remote_storage/remote_storage_bridge/opentsdb/client.go diff --git a/storage/remote/opentsdb/client_test.go b/documentation/examples/remote_storage/remote_storage_bridge/opentsdb/client_test.go similarity index 100% rename from storage/remote/opentsdb/client_test.go rename to documentation/examples/remote_storage/remote_storage_bridge/opentsdb/client_test.go diff --git a/storage/remote/opentsdb/tagvalue.go b/documentation/examples/remote_storage/remote_storage_bridge/opentsdb/tagvalue.go similarity index 100% rename from storage/remote/opentsdb/tagvalue.go rename to documentation/examples/remote_storage/remote_storage_bridge/opentsdb/tagvalue.go diff --git a/storage/remote/opentsdb/tagvalue_test.go b/documentation/examples/remote_storage/remote_storage_bridge/opentsdb/tagvalue_test.go similarity index 100% rename from storage/remote/opentsdb/tagvalue_test.go rename to documentation/examples/remote_storage/remote_storage_bridge/opentsdb/tagvalue_test.go diff --git a/storage/remote/remote.go b/storage/remote/remote.go index 4aedb794ff..1fcb0cded2 100644 --- a/storage/remote/remote.go +++ b/storage/remote/remote.go @@ -1,4 +1,4 @@ -// Copyright 2015 The Prometheus Authors +// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -14,101 +14,67 @@ package remote import ( - "net/url" "sync" - "time" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - influx "github.com/influxdb/influxdb/client" - "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/relabel" - "github.com/prometheus/prometheus/storage/remote/graphite" - "github.com/prometheus/prometheus/storage/remote/influxdb" - "github.com/prometheus/prometheus/storage/remote/opentsdb" ) -// Storage collects multiple remote storage queues. +// Storage allows queueing samples for remote writes. type Storage struct { - queues []*StorageQueueManager - externalLabels model.LabelSet - relabelConfigs []*config.RelabelConfig mtx sync.RWMutex + externalLabels model.LabelSet + conf config.RemoteWriteConfig + + queue *StorageQueueManager } -// ApplyConfig updates the status state as the new config requires. +// ApplyConfig updates the state as the new config requires. func (s *Storage) ApplyConfig(conf *config.Config) error { s.mtx.Lock() defer s.mtx.Unlock() - s.externalLabels = conf.GlobalConfig.ExternalLabels - s.relabelConfigs = conf.RemoteWriteConfig.WriteRelabelConfigs - return nil -} + // TODO: we should only stop & recreate queues which have changes, + // as this can be quite disruptive. + var newQueue *StorageQueueManager -// New returns a new remote Storage. -func New(o *Options) (*Storage, error) { - s := &Storage{} - if o.GraphiteAddress != "" { - c := graphite.NewClient( - o.GraphiteAddress, o.GraphiteTransport, - o.StorageTimeout, o.GraphitePrefix) - s.queues = append(s.queues, NewStorageQueueManager(c, nil)) - } - if o.OpentsdbURL != "" { - c := opentsdb.NewClient(o.OpentsdbURL, o.StorageTimeout) - s.queues = append(s.queues, NewStorageQueueManager(c, nil)) - } - if o.InfluxdbURL != nil { - conf := influx.Config{ - URL: *o.InfluxdbURL, - Username: o.InfluxdbUsername, - Password: o.InfluxdbPassword, - Timeout: o.StorageTimeout, + if conf.RemoteWriteConfig.URL != nil { + c, err := NewClient(conf.RemoteWriteConfig) + if err != nil { + return err } - c := influxdb.NewClient(conf, o.InfluxdbDatabase, o.InfluxdbRetentionPolicy) - prometheus.MustRegister(c) - s.queues = append(s.queues, NewStorageQueueManager(c, nil)) + newQueue = NewStorageQueueManager(c, nil) } - if len(s.queues) == 0 { - return nil, nil - } - return s, nil -} -// Options contains configuration parameters for a remote storage. -type Options struct { - StorageTimeout time.Duration - InfluxdbURL *url.URL - InfluxdbRetentionPolicy string - InfluxdbUsername string - InfluxdbPassword string - InfluxdbDatabase string - OpentsdbURL string - GraphiteAddress string - GraphiteTransport string - GraphitePrefix string -} - -// Start starts the background processing of the storage queues. -func (s *Storage) Start() { - for _, q := range s.queues { - q.Start() + if s.queue != nil { + s.queue.Stop() } + s.queue = newQueue + s.conf = conf.RemoteWriteConfig + s.externalLabels = conf.GlobalConfig.ExternalLabels + if s.queue != nil { + s.queue.Start() + } + return nil } // Stop the background processing of the storage queues. func (s *Storage) Stop() { - for _, q := range s.queues { - q.Stop() + if s.queue != nil { + s.queue.Stop() } } // Append implements storage.SampleAppender. Always returns nil. func (s *Storage) Append(smpl *model.Sample) error { s.mtx.RLock() + defer s.mtx.RUnlock() + + if s.queue == nil { + return nil + } var snew model.Sample snew = *smpl @@ -120,16 +86,12 @@ func (s *Storage) Append(smpl *model.Sample) error { } } snew.Metric = model.Metric( - relabel.Process(model.LabelSet(snew.Metric), s.relabelConfigs...)) - s.mtx.RUnlock() + relabel.Process(model.LabelSet(snew.Metric), s.conf.WriteRelabelConfigs...)) if snew.Metric == nil { return nil } - - for _, q := range s.queues { - q.Append(&snew) - } + s.queue.Append(&snew) return nil } diff --git a/storage/remote/remote_reloadable.go b/storage/remote/remote_reloadable.go deleted file mode 100644 index abe9abe543..0000000000 --- a/storage/remote/remote_reloadable.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2016 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package remote - -import ( - "sync" - - "github.com/prometheus/common/model" - - "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/relabel" -) - -// Storage collects multiple remote storage queues. -type ReloadableStorage struct { - mtx sync.RWMutex - externalLabels model.LabelSet - conf config.RemoteWriteConfig - - queue *StorageQueueManager -} - -// New returns a new remote Storage. -func NewConfigurable() *ReloadableStorage { - return &ReloadableStorage{} -} - -// ApplyConfig updates the state as the new config requires. -func (s *ReloadableStorage) ApplyConfig(conf *config.Config) error { - s.mtx.Lock() - defer s.mtx.Unlock() - - // TODO: we should only stop & recreate queues which have changes, - // as this can be quite disruptive. - var newQueue *StorageQueueManager - - if conf.RemoteWriteConfig.URL != nil { - c, err := NewClient(conf.RemoteWriteConfig) - if err != nil { - return err - } - newQueue = NewStorageQueueManager(c, nil) - } - - if s.queue != nil { - s.queue.Stop() - } - s.queue = newQueue - s.conf = conf.RemoteWriteConfig - s.externalLabels = conf.GlobalConfig.ExternalLabels - if s.queue != nil { - s.queue.Start() - } - return nil -} - -// Stop the background processing of the storage queues. -func (s *ReloadableStorage) Stop() { - if s.queue != nil { - s.queue.Stop() - } -} - -// Append implements storage.SampleAppender. Always returns nil. -func (s *ReloadableStorage) Append(smpl *model.Sample) error { - s.mtx.RLock() - defer s.mtx.RUnlock() - - if s.queue == nil { - return nil - } - - var snew model.Sample - snew = *smpl - snew.Metric = smpl.Metric.Clone() - - for ln, lv := range s.externalLabels { - if _, ok := smpl.Metric[ln]; !ok { - snew.Metric[ln] = lv - } - } - snew.Metric = model.Metric( - relabel.Process(model.LabelSet(snew.Metric), s.conf.WriteRelabelConfigs...)) - - if snew.Metric == nil { - return nil - } - s.queue.Append(&snew) - return nil -} - -// NeedsThrottling implements storage.SampleAppender. It will always return -// false as a remote storage drops samples on the floor if backlogging instead -// of asking for throttling. -func (s *ReloadableStorage) NeedsThrottling() bool { - return false -} From b4ce5ca9254c305dbed0a164486640a19d66de1e Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 14 Feb 2017 18:32:07 +0100 Subject: [PATCH 25/37] vendoring: Do not ignore appengine build tags For weird reasons, this excluded github.com/miekg/dns/udp_other.go from vendoring. --- vendor/github.com/miekg/dns/udp_other.go | 15 +++++++++++++++ vendor/vendor.json | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 vendor/github.com/miekg/dns/udp_other.go diff --git a/vendor/github.com/miekg/dns/udp_other.go b/vendor/github.com/miekg/dns/udp_other.go new file mode 100644 index 0000000000..488a282b26 --- /dev/null +++ b/vendor/github.com/miekg/dns/udp_other.go @@ -0,0 +1,15 @@ +// +build !linux appengine + +package dns + +import ( + "net" +) + +// These do nothing. See udp_linux.go for an example of how to implement this. + +// We tried to adhire to some kind of naming scheme. +func setUDPSocketOptions(conn *net.UDPConn) error { return nil } +func setUDPSocketOptions4(conn *net.UDPConn) error { return nil } +func setUDPSocketOptions6(conn *net.UDPConn) error { return nil } +func getUDPSocketOptions6Only(conn *net.UDPConn) (bool, error) { return false, nil } diff --git a/vendor/vendor.json b/vendor/vendor.json index 2de3d820a0..bbab8e6aaf 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -1,6 +1,6 @@ { "comment": "", - "ignore": "test appengine", + "ignore": "test", "package": [ { "checksumSHA1": "Cslv4/ITyQmgjSUhNXFu8q5bqOU=", @@ -508,7 +508,7 @@ "revisionTime": "2015-04-06T19:39:34+02:00" }, { - "checksumSHA1": "wJxkqZ80Ux6+klKr1+3erSZjVnQ=", + "checksumSHA1": "UoQnBcZrj1gvLAK+MGNB+E7+AIE=", "path": "github.com/miekg/dns", "revision": "672033dedc09500ca4d340760d0b80b9c0b198bd", "revisionTime": "2017-02-13T20:16:50Z" From 0b1790ee4446449f276f9abac4e6436e0b3e87d6 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Tue, 14 Feb 2017 20:12:38 -0500 Subject: [PATCH 26/37] Match addresses with or without declared ports. This change updates port relabeling for pod and service discovery so the relabeling regex matches addresses with or without declared ports. As well, this change uses a consistent style in the replacement pattern for the two expressions. Previously, for both services or pods that did not have declared ports, the relabel config regex would fail to match: __meta_kubernetes_service_annotation_prometheus_io_port regex: (.+)(?::\d+);(\d+) __meta_kubernetes_pod_annotation_prometheus_io_port regex: (.+):(?:\d+);(\d+) Both regexes expected a : pattern. The new regex matches addresses with or without declared ports by making the : pattern optional. __meta_kubernetes_service_annotation_prometheus_io_port __meta_kubernetes_pod_annotation_prometheus_io_port regex: (.+)(?::\d+)?;(\d+) --- documentation/examples/prometheus-kubernetes.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/examples/prometheus-kubernetes.yml b/documentation/examples/prometheus-kubernetes.yml index 1f91d36a96..893614e639 100644 --- a/documentation/examples/prometheus-kubernetes.yml +++ b/documentation/examples/prometheus-kubernetes.yml @@ -108,7 +108,7 @@ scrape_configs: - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ - regex: (.+)(?::\d+);(\d+) + regex: (.+)(?::\d+)?;(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) @@ -174,8 +174,8 @@ scrape_configs: regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace - regex: (.+):(?:\d+);(\d+) - replacement: ${1}:${2} + regex: (.+)(?::\d+)?;(\d+) + replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) From 3f29324e04524f70b291f9aca694dade3835e52e Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Thu, 16 Feb 2017 14:46:04 -0500 Subject: [PATCH 27/37] Fix kubernetes host:port relabel regex. This change corrects a bug introduced by PR https://github.com/prometheus/prometheus/pull/2427 The regex uses three groups: the hostname, an optional port, and the prefered port from a kubernetes annotation. Previously, the second group should have been ignored if a :port was not present in the input. However, making the port group optional with the "?" had the unintended side-effect of allowing the hostname regex "(.+)" to match greedily, which included the ":port" patterns up to the ";" separating the hostname from the kubernetes port annotation. This change updates the regex for the hostname to match any non-":" characters. This forces the regex to stop if a ":port" is present and allow the second group to match the optional port. --- documentation/examples/prometheus-kubernetes.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/examples/prometheus-kubernetes.yml b/documentation/examples/prometheus-kubernetes.yml index 893614e639..8ac7899b2e 100644 --- a/documentation/examples/prometheus-kubernetes.yml +++ b/documentation/examples/prometheus-kubernetes.yml @@ -108,7 +108,7 @@ scrape_configs: - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ - regex: (.+)(?::\d+)?;(\d+) + regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) @@ -174,7 +174,7 @@ scrape_configs: regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace - regex: (.+)(?::\d+)?;(\d+) + regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap From f70b52d0b6d91bfb19fc45ac5ef7872d2ba78064 Mon Sep 17 00:00:00 2001 From: Jeremy Meulemans Date: Thu, 16 Feb 2017 17:55:42 -0600 Subject: [PATCH 28/37] Adding gauge for number of open head chunks. Fixes #1710 --- storage/local/storage.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/storage/local/storage.go b/storage/local/storage.go index 25dc30b0a7..81e1fd7306 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -180,6 +180,7 @@ type MemorySeriesStorage struct { persistErrors prometheus.Counter queuedChunksToPersist prometheus.Counter numSeries prometheus.Gauge + numHeadChunks prometheus.Gauge dirtySeries prometheus.Gauge seriesOps *prometheus.CounterVec ingestedSamplesCount prometheus.Counter @@ -254,6 +255,12 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) *MemorySeriesStorage Name: "memory_series", Help: "The current number of series in memory.", }), + numHeadChunks: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "num_head_chunks_open", + Help: "The current number of open head chunks.", + }), dirtySeries: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, @@ -372,6 +379,16 @@ func (s *MemorySeriesStorage) Start() (err error) { log.Info("Loading series map and head chunks...") s.fpToSeries, s.numChunksToPersist, err = p.loadSeriesMapAndHeads() + openHeadCount := 0 + for fp := range s.fpToSeries.fpIter() { + if series, ok := s.fpToSeries.get(fp); ok { + if !series.headChunkClosed { + openHeadCount += 1 + } + } + } + s.numHeadChunks.Set(float64(openHeadCount)) + if err != nil { return err } @@ -977,6 +994,9 @@ func (s *MemorySeriesStorage) getOrCreateSeries(fp model.Fingerprint, m model.Me } s.fpToSeries.put(fp, series) s.numSeries.Inc() + if !series.headChunkClosed { + s.numHeadChunks.Inc() + } } return series, nil } @@ -1383,6 +1403,7 @@ func (s *MemorySeriesStorage) maintainMemorySeries( } if closed { s.incNumChunksToPersist(1) + s.numHeadChunks.Dec() } seriesWasDirty := series.dirty @@ -1783,6 +1804,7 @@ func (s *MemorySeriesStorage) Describe(ch chan<- *prometheus.Desc) { ch <- maxChunksToPersistDesc ch <- numChunksToPersistDesc ch <- s.numSeries.Desc() + ch <- s.numHeadChunks.Desc() ch <- s.dirtySeries.Desc() s.seriesOps.Describe(ch) ch <- s.ingestedSamplesCount.Desc() @@ -1812,6 +1834,7 @@ func (s *MemorySeriesStorage) Collect(ch chan<- prometheus.Metric) { float64(s.getNumChunksToPersist()), ) ch <- s.numSeries + ch <- s.numHeadChunks ch <- s.dirtySeries s.seriesOps.Collect(ch) ch <- s.ingestedSamplesCount From 074050b8c053e65f383a9d59541191379bef1d42 Mon Sep 17 00:00:00 2001 From: Jeremy Meulemans Date: Thu, 16 Feb 2017 18:04:28 -0600 Subject: [PATCH 29/37] Updating for failed codeclimate check. --- storage/local/storage.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/local/storage.go b/storage/local/storage.go index 81e1fd7306..3945509ec2 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -383,7 +383,7 @@ func (s *MemorySeriesStorage) Start() (err error) { for fp := range s.fpToSeries.fpIter() { if series, ok := s.fpToSeries.get(fp); ok { if !series.headChunkClosed { - openHeadCount += 1 + openHeadCount++ } } } From 025c828976b0ba92bb55a88dfee5a80572e72a78 Mon Sep 17 00:00:00 2001 From: Jeremy Meulemans Date: Fri, 17 Feb 2017 07:10:13 -0600 Subject: [PATCH 30/37] Changed to open_head_chunks to address review. Now incrementing numHeadChunks directly. --- storage/local/storage.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/storage/local/storage.go b/storage/local/storage.go index 3945509ec2..fa9001702a 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -258,7 +258,7 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) *MemorySeriesStorage numHeadChunks: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "num_head_chunks_open", + Name: "open_head_chunks", Help: "The current number of open head chunks.", }), dirtySeries: prometheus.NewGauge(prometheus.GaugeOpts{ @@ -379,15 +379,13 @@ func (s *MemorySeriesStorage) Start() (err error) { log.Info("Loading series map and head chunks...") s.fpToSeries, s.numChunksToPersist, err = p.loadSeriesMapAndHeads() - openHeadCount := 0 for fp := range s.fpToSeries.fpIter() { if series, ok := s.fpToSeries.get(fp); ok { if !series.headChunkClosed { - openHeadCount++ + s.numHeadChunks.Inc() } } } - s.numHeadChunks.Set(float64(openHeadCount)) if err != nil { return err From 74a3b1051ab29f388c0184800fdea47d150d8215 Mon Sep 17 00:00:00 2001 From: Derek Marcotte Date: Sat, 18 Feb 2017 05:03:32 -0500 Subject: [PATCH 31/37] Add rickshawGraph proerty to the Graph object. (#2438) Add rickshawGraph property to the Graph object. This allows further changes to be made to the graph, for example, annotations. --- web/ui/static/js/prom_console.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/web/ui/static/js/prom_console.js b/web/ui/static/js/prom_console.js index ed2fe8682a..2192c8b7e2 100644 --- a/web/ui/static/js/prom_console.js +++ b/web/ui/static/js/prom_console.js @@ -332,6 +332,9 @@ PromConsole.Graph = function(params) { this.params = params; this.rendered_data = null; + // Keep a reference so that further updates (e.g. annotations) can be made + // by the user in their templates. + this.rickshawGraph = null; PromConsole._graph_registry.push(this); /* @@ -504,6 +507,8 @@ PromConsole.Graph.prototype._render = function(data) { xAxis.render(); yAxis.render(); graph.render(); + + this.rickshawGraph = graph; }; PromConsole.Graph.prototype._clearGraph = function() { @@ -513,6 +518,7 @@ PromConsole.Graph.prototype._clearGraph = function() { while (this.legendDiv.lastChild) { this.legendDiv.removeChild(this.legendDiv.lastChild); } + this.rickshawGraph = null; }; PromConsole.Graph.prototype._xhrs = []; From a1596f9c7915b143c168b1e18ef11e084f1b5a6a Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 16 Feb 2017 20:13:49 +0100 Subject: [PATCH 32/37] Replace AUTHORS.md by an updated MAINTAINERS.md --- AUTHORS.md | 13 ------------- CONTRIBUTING.md | 6 +++--- MAINTAINERS.md | 7 +++++++ 3 files changed, 10 insertions(+), 16 deletions(-) delete mode 100644 AUTHORS.md create mode 100644 MAINTAINERS.md diff --git a/AUTHORS.md b/AUTHORS.md deleted file mode 100644 index dddaa986a8..0000000000 --- a/AUTHORS.md +++ /dev/null @@ -1,13 +0,0 @@ -The Prometheus project was started by Matt T. Proud (emeritus) and -Julius Volz in 2012. - -Maintainers of this repository: - -* Björn Rabenstein -* Fabian Reinartz -* Julius Volz - -More than [100 individuals][1] have contributed to this repository. Please -refer to the Git commit log for a complete list. - -[1]: https://github.com/prometheus/prometheus/graphs/contributors diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5705f0fbea..dde3851b35 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,9 +2,9 @@ Prometheus uses GitHub to manage reviews of pull requests. -* If you have a trivial fix or improvement, go ahead and create a pull - request, addressing (with `@...`) one or more of the maintainers - (see [AUTHORS.md](AUTHORS.md)) in the description of the pull request. +* If you have a trivial fix or improvement, go ahead and create a pull request, + addressing (with `@...`) a suitable maintainer of this repository (see + [MAINTAINERS.md](MAINTAINERS.md)) in the description of the pull request. * If you plan to do something more involved, first discuss your ideas on our [mailing list](https://groups.google.com/forum/?fromgroups#!forum/prometheus-developers). diff --git a/MAINTAINERS.md b/MAINTAINERS.md new file mode 100644 index 0000000000..c55b0530ef --- /dev/null +++ b/MAINTAINERS.md @@ -0,0 +1,7 @@ +Maintainers of this repository with their focus areas: + +* Björn Rabenstein : Local storage; general code-level issues. +* Brian Brazil : Console templates; semantics of PromQL, service discovery, and relabeling. +* Fabian Reinartz : PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery. +* Julius Volz : Remote storage integrations; web UI. + From a6d81a9f8879913d1ab271bea7596eb45656e699 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 20 Feb 2017 13:21:47 +0100 Subject: [PATCH 33/37] Update vendoring for github.com/prometheus/common/model --- vendor/github.com/prometheus/common/model/value.go | 5 +---- vendor/vendor.json | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/vendor/github.com/prometheus/common/model/value.go b/vendor/github.com/prometheus/common/model/value.go index 7728abaeea..c9ed3ffd82 100644 --- a/vendor/github.com/prometheus/common/model/value.go +++ b/vendor/github.com/prometheus/common/model/value.go @@ -129,11 +129,8 @@ func (s *Sample) Equal(o *Sample) bool { if !s.Timestamp.Equal(o.Timestamp) { return false } - if s.Value.Equal(o.Value) { - return false - } - return true + return s.Value.Equal(o.Value) } func (s Sample) String() string { diff --git a/vendor/vendor.json b/vendor/vendor.json index bbab8e6aaf..33f63e6498 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -560,8 +560,8 @@ { "checksumSHA1": "vopCLXHzYm+3l5fPKOf4/fQwrCM=", "path": "github.com/prometheus/common/model", - "revision": "dd2f054febf4a6c00f2343686efb775948a8bff4", - "revisionTime": "2017-01-08T23:12:12Z" + "revision": "3007b6072c17c8d985734e6e19b1dea9174e13d3", + "revisionTime": "2017-02-19T00:35:58+01:00" }, { "checksumSHA1": "ZbbESWBHHcPUJ/A5yrzKhTHuPc8=", From e9476b35d5b5c42a008007a89154181a2ed8c71b Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 13 Feb 2017 21:43:20 +0100 Subject: [PATCH 34/37] Re-add multiple remote writers Each remote write endpoint gets its own set of relabeling rules. This is based on the (yet-to-be-merged) https://github.com/prometheus/prometheus/pull/2419, which removes legacy remote write implementations. --- config/config.go | 2 +- config/config_test.go | 33 +++++++++++---- config/testdata/conf.good.yml | 10 +++-- storage/remote/client.go | 8 ++-- storage/remote/queue_manager.go | 63 ++++++++++++++++++++-------- storage/remote/queue_manager_test.go | 36 +++++++++------- storage/remote/remote.go | 59 +++++++++----------------- 7 files changed, 122 insertions(+), 89 deletions(-) diff --git a/config/config.go b/config/config.go index ae00463299..93d4c67e41 100644 --- a/config/config.go +++ b/config/config.go @@ -204,7 +204,7 @@ type Config struct { RuleFiles []string `yaml:"rule_files,omitempty"` ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"` - RemoteWriteConfig RemoteWriteConfig `yaml:"remote_write,omitempty"` + RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"` // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` diff --git a/config/config_test.go b/config/config_test.go index a8c1f32e74..4be6989b7b 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -26,6 +26,14 @@ import ( "gopkg.in/yaml.v2" ) +func mustParseURL(u string) *URL { + parsed, err := url.Parse(u) + if err != nil { + panic(err) + } + return &URL{URL: parsed} +} + var expectedConf = &Config{ GlobalConfig: GlobalConfig{ ScrapeInterval: model.Duration(15 * time.Second), @@ -44,17 +52,24 @@ var expectedConf = &Config{ "testdata/my/*.rules", }, - RemoteWriteConfig: RemoteWriteConfig{ - RemoteTimeout: model.Duration(30 * time.Second), - WriteRelabelConfigs: []*RelabelConfig{ - { - SourceLabels: model.LabelNames{"__name__"}, - Separator: ";", - Regex: MustNewRegexp("expensive.*"), - Replacement: "$1", - Action: RelabelDrop, + RemoteWriteConfigs: []*RemoteWriteConfig{ + { + URL: mustParseURL("http://remote1/push"), + RemoteTimeout: model.Duration(30 * time.Second), + WriteRelabelConfigs: []*RelabelConfig{ + { + SourceLabels: model.LabelNames{"__name__"}, + Separator: ";", + Regex: MustNewRegexp("expensive.*"), + Replacement: "$1", + Action: RelabelDrop, + }, }, }, + { + URL: mustParseURL("http://remote2/push"), + RemoteTimeout: model.Duration(30 * time.Second), + }, }, ScrapeConfigs: []*ScrapeConfig{ diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index a3aca858a4..7fc6161138 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -14,10 +14,12 @@ rule_files: - "my/*.rules" remote_write: - write_relabel_configs: - - source_labels: [__name__] - regex: expensive.* - action: drop + - url: http://remote1/push + write_relabel_configs: + - source_labels: [__name__] + regex: expensive.* + action: drop + - url: http://remote2/push scrape_configs: - job_name: prometheus diff --git a/storage/remote/client.go b/storage/remote/client.go index 771ecb696c..e5d97feda1 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -31,13 +31,14 @@ import ( // Client allows sending batches of Prometheus samples to an HTTP endpoint. type Client struct { + index int // Used to differentiate metrics. url config.URL client *http.Client timeout time.Duration } // NewClient creates a new Client. -func NewClient(conf config.RemoteWriteConfig) (*Client, error) { +func NewClient(index int, conf *config.RemoteWriteConfig) (*Client, error) { tlsConfig, err := httputil.NewTLSConfig(conf.TLSConfig) if err != nil { return nil, err @@ -55,6 +56,7 @@ func NewClient(conf config.RemoteWriteConfig) (*Client, error) { } return &Client{ + index: index, url: *conf.URL, client: httputil.NewClient(rt), timeout: time.Duration(conf.RemoteTimeout), @@ -114,7 +116,7 @@ func (c *Client) Store(samples model.Samples) error { return nil } -// Name identifies the client as a generic client. +// Name identifies the client. func (c Client) Name() string { - return "generic" + return fmt.Sprintf("%d:%s", c.index, c.url) } diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index 87fa5f01cb..b7196f156c 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -20,6 +20,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/relabel" ) // String constants for instrumentation. @@ -27,6 +29,12 @@ const ( namespace = "prometheus" subsystem = "remote_storage" queue = "queue" + + defaultShards = 10 + defaultMaxSamplesPerSend = 100 + // The queue capacity is per shard. + defaultQueueCapacity = 100 * 1024 / defaultShards + defaultBatchSendDeadline = 5 * time.Second ) var ( @@ -105,25 +113,21 @@ type StorageClient interface { Name() string } +// StorageQueueManagerConfig configures a storage queue. type StorageQueueManagerConfig struct { QueueCapacity int // Number of samples to buffer per shard before we start dropping them. Shards int // Number of shards, i.e. amount of concurrency. MaxSamplesPerSend int // Maximum number of samples per send. BatchSendDeadline time.Duration // Maximum time sample will wait in buffer. -} - -var defaultConfig = StorageQueueManagerConfig{ - QueueCapacity: 100 * 1024 / 10, - Shards: 10, - MaxSamplesPerSend: 100, - BatchSendDeadline: 5 * time.Second, + ExternalLabels model.LabelSet + RelabelConfigs []*config.RelabelConfig + Client StorageClient } // StorageQueueManager manages a queue of samples to be sent to the Storage // indicated by the provided StorageClient. type StorageQueueManager struct { cfg StorageQueueManagerConfig - tsdb StorageClient shards []chan *model.Sample wg sync.WaitGroup done chan struct{} @@ -131,9 +135,18 @@ type StorageQueueManager struct { } // NewStorageQueueManager builds a new StorageQueueManager. -func NewStorageQueueManager(tsdb StorageClient, cfg *StorageQueueManagerConfig) *StorageQueueManager { - if cfg == nil { - cfg = &defaultConfig +func NewStorageQueueManager(cfg StorageQueueManagerConfig) *StorageQueueManager { + if cfg.QueueCapacity == 0 { + cfg.QueueCapacity = defaultQueueCapacity + } + if cfg.Shards == 0 { + cfg.Shards = defaultShards + } + if cfg.MaxSamplesPerSend == 0 { + cfg.MaxSamplesPerSend = defaultMaxSamplesPerSend + } + if cfg.BatchSendDeadline == 0 { + cfg.BatchSendDeadline = defaultBatchSendDeadline } shards := make([]chan *model.Sample, cfg.Shards) @@ -142,11 +155,10 @@ func NewStorageQueueManager(tsdb StorageClient, cfg *StorageQueueManagerConfig) } t := &StorageQueueManager{ - cfg: *cfg, - tsdb: tsdb, + cfg: cfg, shards: shards, done: make(chan struct{}), - queueName: tsdb.Name(), + queueName: cfg.Client.Name(), } queueCapacity.WithLabelValues(t.queueName).Set(float64(t.cfg.QueueCapacity)) @@ -158,11 +170,28 @@ func NewStorageQueueManager(tsdb StorageClient, cfg *StorageQueueManagerConfig) // sample on the floor if the queue is full. // Always returns nil. func (t *StorageQueueManager) Append(s *model.Sample) error { - fp := s.Metric.FastFingerprint() + var snew model.Sample + snew = *s + snew.Metric = s.Metric.Clone() + + for ln, lv := range t.cfg.ExternalLabels { + if _, ok := s.Metric[ln]; !ok { + snew.Metric[ln] = lv + } + } + + snew.Metric = model.Metric( + relabel.Process(model.LabelSet(snew.Metric), t.cfg.RelabelConfigs...)) + + if snew.Metric == nil { + return nil + } + + fp := snew.Metric.FastFingerprint() shard := uint64(fp) % uint64(t.cfg.Shards) select { - case t.shards[shard] <- s: + case t.shards[shard] <- &snew: queueLength.WithLabelValues(t.queueName).Inc() default: droppedSamplesTotal.WithLabelValues(t.queueName).Inc() @@ -239,7 +268,7 @@ func (t *StorageQueueManager) sendSamples(s model.Samples) { // sample isn't sent correctly the first time, it's simply dropped on the // floor. begin := time.Now() - err := t.tsdb.Store(s) + err := t.cfg.Client.Store(s) duration := time.Since(begin).Seconds() if err != nil { diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index c4d1dbd8b0..3908a5a02f 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -81,9 +81,7 @@ func (c *TestStorageClient) Name() string { func TestSampleDelivery(t *testing.T) { // Let's create an even number of send batches so we don't run into the // batch timeout case. - cfg := defaultConfig - n := cfg.QueueCapacity * 2 - cfg.Shards = 1 + n := defaultQueueCapacity * 2 samples := make(model.Samples, 0, n) for i := 0; i < n; i++ { @@ -98,7 +96,11 @@ func TestSampleDelivery(t *testing.T) { c := NewTestStorageClient() c.expectSamples(samples[:len(samples)/2]) - m := NewStorageQueueManager(c, &cfg) + + m := NewStorageQueueManager(StorageQueueManagerConfig{ + Client: c, + Shards: 1, + }) // These should be received by the client. for _, s := range samples[:len(samples)/2] { @@ -115,11 +117,8 @@ func TestSampleDelivery(t *testing.T) { } func TestSampleDeliveryOrder(t *testing.T) { - cfg := defaultConfig ts := 10 - n := cfg.MaxSamplesPerSend * ts - // Ensure we don't drop samples in this test. - cfg.QueueCapacity = n + n := defaultMaxSamplesPerSend * ts samples := make(model.Samples, 0, n) for i := 0; i < n; i++ { @@ -135,7 +134,11 @@ func TestSampleDeliveryOrder(t *testing.T) { c := NewTestStorageClient() c.expectSamples(samples) - m := NewStorageQueueManager(c, &cfg) + m := NewStorageQueueManager(StorageQueueManagerConfig{ + Client: c, + // Ensure we don't drop samples in this test. + QueueCapacity: n, + }) // These should be received by the client. for _, s := range samples { @@ -194,9 +197,7 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) { // `MaxSamplesPerSend*Shards` samples should be consumed by the // per-shard goroutines, and then another `MaxSamplesPerSend` // should be left on the queue. - cfg := defaultConfig - n := cfg.MaxSamplesPerSend*cfg.Shards + cfg.MaxSamplesPerSend - cfg.QueueCapacity = n + n := defaultMaxSamplesPerSend*defaultShards + defaultMaxSamplesPerSend samples := make(model.Samples, 0, n) for i := 0; i < n; i++ { @@ -210,7 +211,10 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) { } c := NewTestBlockedStorageClient() - m := NewStorageQueueManager(c, &cfg) + m := NewStorageQueueManager(StorageQueueManagerConfig{ + Client: c, + QueueCapacity: n, + }) m.Start() @@ -239,14 +243,14 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) { time.Sleep(10 * time.Millisecond) } - if m.queueLen() != cfg.MaxSamplesPerSend { + if m.queueLen() != defaultMaxSamplesPerSend { t.Fatalf("Failed to drain StorageQueueManager queue, %d elements left", m.queueLen(), ) } numCalls := c.NumCalls() - if numCalls != uint64(cfg.Shards) { - t.Errorf("Saw %d concurrent sends, expected %d", numCalls, cfg.Shards) + if numCalls != uint64(defaultShards) { + t.Errorf("Saw %d concurrent sends, expected %d", numCalls, defaultShards) } } diff --git a/storage/remote/remote.go b/storage/remote/remote.go index 1fcb0cded2..2f9f58efaf 100644 --- a/storage/remote/remote.go +++ b/storage/remote/remote.go @@ -19,16 +19,12 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/relabel" ) // Storage allows queueing samples for remote writes. type Storage struct { - mtx sync.RWMutex - externalLabels model.LabelSet - conf config.RemoteWriteConfig - - queue *StorageQueueManager + mtx sync.RWMutex + queues []*StorageQueueManager } // ApplyConfig updates the state as the new config requires. @@ -36,34 +32,36 @@ func (s *Storage) ApplyConfig(conf *config.Config) error { s.mtx.Lock() defer s.mtx.Unlock() + newQueues := []*StorageQueueManager{} // TODO: we should only stop & recreate queues which have changes, // as this can be quite disruptive. - var newQueue *StorageQueueManager - - if conf.RemoteWriteConfig.URL != nil { - c, err := NewClient(conf.RemoteWriteConfig) + for i, rwConf := range conf.RemoteWriteConfigs { + c, err := NewClient(i, rwConf) if err != nil { return err } - newQueue = NewStorageQueueManager(c, nil) + newQueues = append(newQueues, NewStorageQueueManager(StorageQueueManagerConfig{ + Client: c, + ExternalLabels: conf.GlobalConfig.ExternalLabels, + RelabelConfigs: rwConf.WriteRelabelConfigs, + })) } - if s.queue != nil { - s.queue.Stop() + for _, q := range s.queues { + q.Stop() } - s.queue = newQueue - s.conf = conf.RemoteWriteConfig - s.externalLabels = conf.GlobalConfig.ExternalLabels - if s.queue != nil { - s.queue.Start() + + s.queues = newQueues + for _, q := range s.queues { + q.Start() } return nil } // Stop the background processing of the storage queues. func (s *Storage) Stop() { - if s.queue != nil { - s.queue.Stop() + for _, q := range s.queues { + q.Stop() } } @@ -72,26 +70,9 @@ func (s *Storage) Append(smpl *model.Sample) error { s.mtx.RLock() defer s.mtx.RUnlock() - if s.queue == nil { - return nil + for _, q := range s.queues { + q.Append(smpl) } - - var snew model.Sample - snew = *smpl - snew.Metric = smpl.Metric.Clone() - - for ln, lv := range s.externalLabels { - if _, ok := smpl.Metric[ln]; !ok { - snew.Metric[ln] = lv - } - } - snew.Metric = model.Metric( - relabel.Process(model.LabelSet(snew.Metric), s.conf.WriteRelabelConfigs...)) - - if snew.Metric == nil { - return nil - } - s.queue.Append(&snew) return nil } From 2f39dbc8b3598805a34fa220f136d90e2933d284 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 21 Feb 2017 21:45:43 +0100 Subject: [PATCH 35/37] Rename StorageQueueManager -> QueueManager --- storage/remote/queue_manager.go | 28 ++++++++++++++-------------- storage/remote/queue_manager_test.go | 10 +++++----- storage/remote/remote.go | 6 +++--- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index b7196f156c..0345498016 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -113,8 +113,8 @@ type StorageClient interface { Name() string } -// StorageQueueManagerConfig configures a storage queue. -type StorageQueueManagerConfig struct { +// QueueManagerConfig configures a storage queue. +type QueueManagerConfig struct { QueueCapacity int // Number of samples to buffer per shard before we start dropping them. Shards int // Number of shards, i.e. amount of concurrency. MaxSamplesPerSend int // Maximum number of samples per send. @@ -124,18 +124,18 @@ type StorageQueueManagerConfig struct { Client StorageClient } -// StorageQueueManager manages a queue of samples to be sent to the Storage +// QueueManager manages a queue of samples to be sent to the Storage // indicated by the provided StorageClient. -type StorageQueueManager struct { - cfg StorageQueueManagerConfig +type QueueManager struct { + cfg QueueManagerConfig shards []chan *model.Sample wg sync.WaitGroup done chan struct{} queueName string } -// NewStorageQueueManager builds a new StorageQueueManager. -func NewStorageQueueManager(cfg StorageQueueManagerConfig) *StorageQueueManager { +// NewQueueManager builds a new QueueManager. +func NewQueueManager(cfg QueueManagerConfig) *QueueManager { if cfg.QueueCapacity == 0 { cfg.QueueCapacity = defaultQueueCapacity } @@ -154,7 +154,7 @@ func NewStorageQueueManager(cfg StorageQueueManagerConfig) *StorageQueueManager shards[i] = make(chan *model.Sample, cfg.QueueCapacity) } - t := &StorageQueueManager{ + t := &QueueManager{ cfg: cfg, shards: shards, done: make(chan struct{}), @@ -169,7 +169,7 @@ func NewStorageQueueManager(cfg StorageQueueManagerConfig) *StorageQueueManager // Append queues a sample to be sent to the remote storage. It drops the // sample on the floor if the queue is full. // Always returns nil. -func (t *StorageQueueManager) Append(s *model.Sample) error { +func (t *QueueManager) Append(s *model.Sample) error { var snew model.Sample snew = *s snew.Metric = s.Metric.Clone() @@ -203,13 +203,13 @@ func (t *StorageQueueManager) Append(s *model.Sample) error { // NeedsThrottling implements storage.SampleAppender. It will always return // false as a remote storage drops samples on the floor if backlogging instead // of asking for throttling. -func (*StorageQueueManager) NeedsThrottling() bool { +func (*QueueManager) NeedsThrottling() bool { return false } // Start the queue manager sending samples to the remote storage. // Does not block. -func (t *StorageQueueManager) Start() { +func (t *QueueManager) Start() { for i := 0; i < t.cfg.Shards; i++ { go t.runShard(i) } @@ -217,7 +217,7 @@ func (t *StorageQueueManager) Start() { // Stop stops sending samples to the remote storage and waits for pending // sends to complete. -func (t *StorageQueueManager) Stop() { +func (t *QueueManager) Stop() { log.Infof("Stopping remote storage...") for _, shard := range t.shards { close(shard) @@ -226,7 +226,7 @@ func (t *StorageQueueManager) Stop() { log.Info("Remote storage stopped.") } -func (t *StorageQueueManager) runShard(i int) { +func (t *QueueManager) runShard(i int) { defer t.wg.Done() shard := t.shards[i] @@ -263,7 +263,7 @@ func (t *StorageQueueManager) runShard(i int) { } } -func (t *StorageQueueManager) sendSamples(s model.Samples) { +func (t *QueueManager) sendSamples(s model.Samples) { // Samples are sent to the remote storage on a best-effort basis. If a // sample isn't sent correctly the first time, it's simply dropped on the // floor. diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 3908a5a02f..c843707ea2 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -97,7 +97,7 @@ func TestSampleDelivery(t *testing.T) { c := NewTestStorageClient() c.expectSamples(samples[:len(samples)/2]) - m := NewStorageQueueManager(StorageQueueManagerConfig{ + m := NewQueueManager(QueueManagerConfig{ Client: c, Shards: 1, }) @@ -134,7 +134,7 @@ func TestSampleDeliveryOrder(t *testing.T) { c := NewTestStorageClient() c.expectSamples(samples) - m := NewStorageQueueManager(StorageQueueManagerConfig{ + m := NewQueueManager(QueueManagerConfig{ Client: c, // Ensure we don't drop samples in this test. QueueCapacity: n, @@ -184,7 +184,7 @@ func (c *TestBlockingStorageClient) Name() string { return "testblockingstorageclient" } -func (t *StorageQueueManager) queueLen() int { +func (t *QueueManager) queueLen() int { queueLength := 0 for _, shard := range t.shards { queueLength += len(shard) @@ -211,7 +211,7 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) { } c := NewTestBlockedStorageClient() - m := NewStorageQueueManager(StorageQueueManagerConfig{ + m := NewQueueManager(QueueManagerConfig{ Client: c, QueueCapacity: n, }) @@ -244,7 +244,7 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) { } if m.queueLen() != defaultMaxSamplesPerSend { - t.Fatalf("Failed to drain StorageQueueManager queue, %d elements left", + t.Fatalf("Failed to drain QueueManager queue, %d elements left", m.queueLen(), ) } diff --git a/storage/remote/remote.go b/storage/remote/remote.go index 2f9f58efaf..a53f866b3d 100644 --- a/storage/remote/remote.go +++ b/storage/remote/remote.go @@ -24,7 +24,7 @@ import ( // Storage allows queueing samples for remote writes. type Storage struct { mtx sync.RWMutex - queues []*StorageQueueManager + queues []*QueueManager } // ApplyConfig updates the state as the new config requires. @@ -32,7 +32,7 @@ func (s *Storage) ApplyConfig(conf *config.Config) error { s.mtx.Lock() defer s.mtx.Unlock() - newQueues := []*StorageQueueManager{} + newQueues := []*QueueManager{} // TODO: we should only stop & recreate queues which have changes, // as this can be quite disruptive. for i, rwConf := range conf.RemoteWriteConfigs { @@ -40,7 +40,7 @@ func (s *Storage) ApplyConfig(conf *config.Config) error { if err != nil { return err } - newQueues = append(newQueues, NewStorageQueueManager(StorageQueueManagerConfig{ + newQueues = append(newQueues, NewQueueManager(QueueManagerConfig{ Client: c, ExternalLabels: conf.GlobalConfig.ExternalLabels, RelabelConfigs: rwConf.WriteRelabelConfigs, From 1ab893c6ec9cb466e12a53e3d732ac6a12b6e4be Mon Sep 17 00:00:00 2001 From: Tom Wilkie Date: Thu, 23 Feb 2017 18:20:39 +0000 Subject: [PATCH 36/37] Limit 'discarding sample' logs to 1 every 10s (#2446) * Limit 'discarding sample' logs to 1 every 10s * Include the vendored library * Review feedback --- storage/remote/queue_manager.go | 28 +- vendor/golang.org/x/time/LICENSE | 27 ++ vendor/golang.org/x/time/PATENTS | 22 ++ vendor/golang.org/x/time/rate/rate.go | 371 ++++++++++++++++++++++++++ vendor/vendor.json | 6 + 5 files changed, 444 insertions(+), 10 deletions(-) create mode 100644 vendor/golang.org/x/time/LICENSE create mode 100644 vendor/golang.org/x/time/PATENTS create mode 100644 vendor/golang.org/x/time/rate/rate.go diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index 0345498016..a9c6eb4602 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -17,6 +17,8 @@ import ( "sync" "time" + "golang.org/x/time/rate" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" @@ -35,6 +37,8 @@ const ( // The queue capacity is per shard. defaultQueueCapacity = 100 * 1024 / defaultShards defaultBatchSendDeadline = 5 * time.Second + logRateLimit = 0.1 // Limit to 1 log event every 10s + logBurst = 10 ) var ( @@ -127,11 +131,12 @@ type QueueManagerConfig struct { // QueueManager manages a queue of samples to be sent to the Storage // indicated by the provided StorageClient. type QueueManager struct { - cfg QueueManagerConfig - shards []chan *model.Sample - wg sync.WaitGroup - done chan struct{} - queueName string + cfg QueueManagerConfig + shards []chan *model.Sample + wg sync.WaitGroup + done chan struct{} + queueName string + logLimiter *rate.Limiter } // NewQueueManager builds a new QueueManager. @@ -155,10 +160,11 @@ func NewQueueManager(cfg QueueManagerConfig) *QueueManager { } t := &QueueManager{ - cfg: cfg, - shards: shards, - done: make(chan struct{}), - queueName: cfg.Client.Name(), + cfg: cfg, + shards: shards, + done: make(chan struct{}), + queueName: cfg.Client.Name(), + logLimiter: rate.NewLimiter(logRateLimit, logBurst), } queueCapacity.WithLabelValues(t.queueName).Set(float64(t.cfg.QueueCapacity)) @@ -195,7 +201,9 @@ func (t *QueueManager) Append(s *model.Sample) error { queueLength.WithLabelValues(t.queueName).Inc() default: droppedSamplesTotal.WithLabelValues(t.queueName).Inc() - log.Warn("Remote storage queue full, discarding sample.") + if t.logLimiter.Allow() { + log.Warn("Remote storage queue full, discarding sample. Multiple subsequent messages of this kind may be suppressed.") + } } return nil } diff --git a/vendor/golang.org/x/time/LICENSE b/vendor/golang.org/x/time/LICENSE new file mode 100644 index 0000000000..6a66aea5ea --- /dev/null +++ b/vendor/golang.org/x/time/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/time/PATENTS b/vendor/golang.org/x/time/PATENTS new file mode 100644 index 0000000000..733099041f --- /dev/null +++ b/vendor/golang.org/x/time/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/time/rate/rate.go b/vendor/golang.org/x/time/rate/rate.go new file mode 100644 index 0000000000..938feaffe9 --- /dev/null +++ b/vendor/golang.org/x/time/rate/rate.go @@ -0,0 +1,371 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package rate provides a rate limiter. +package rate + +import ( + "fmt" + "math" + "sync" + "time" + + "golang.org/x/net/context" +) + +// Limit defines the maximum frequency of some events. +// Limit is represented as number of events per second. +// A zero Limit allows no events. +type Limit float64 + +// Inf is the infinite rate limit; it allows all events (even if burst is zero). +const Inf = Limit(math.MaxFloat64) + +// Every converts a minimum time interval between events to a Limit. +func Every(interval time.Duration) Limit { + if interval <= 0 { + return Inf + } + return 1 / Limit(interval.Seconds()) +} + +// A Limiter controls how frequently events are allowed to happen. +// It implements a "token bucket" of size b, initially full and refilled +// at rate r tokens per second. +// Informally, in any large enough time interval, the Limiter limits the +// rate to r tokens per second, with a maximum burst size of b events. +// As a special case, if r == Inf (the infinite rate), b is ignored. +// See https://en.wikipedia.org/wiki/Token_bucket for more about token buckets. +// +// The zero value is a valid Limiter, but it will reject all events. +// Use NewLimiter to create non-zero Limiters. +// +// Limiter has three main methods, Allow, Reserve, and Wait. +// Most callers should use Wait. +// +// Each of the three methods consumes a single token. +// They differ in their behavior when no token is available. +// If no token is available, Allow returns false. +// If no token is available, Reserve returns a reservation for a future token +// and the amount of time the caller must wait before using it. +// If no token is available, Wait blocks until one can be obtained +// or its associated context.Context is canceled. +// +// The methods AllowN, ReserveN, and WaitN consume n tokens. +type Limiter struct { + limit Limit + burst int + + mu sync.Mutex + tokens float64 + // last is the last time the limiter's tokens field was updated + last time.Time + // lastEvent is the latest time of a rate-limited event (past or future) + lastEvent time.Time +} + +// Limit returns the maximum overall event rate. +func (lim *Limiter) Limit() Limit { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.limit +} + +// Burst returns the maximum burst size. Burst is the maximum number of tokens +// that can be consumed in a single call to Allow, Reserve, or Wait, so higher +// Burst values allow more events to happen at once. +// A zero Burst allows no events, unless limit == Inf. +func (lim *Limiter) Burst() int { + return lim.burst +} + +// NewLimiter returns a new Limiter that allows events up to rate r and permits +// bursts of at most b tokens. +func NewLimiter(r Limit, b int) *Limiter { + return &Limiter{ + limit: r, + burst: b, + } +} + +// Allow is shorthand for AllowN(time.Now(), 1). +func (lim *Limiter) Allow() bool { + return lim.AllowN(time.Now(), 1) +} + +// AllowN reports whether n events may happen at time now. +// Use this method if you intend to drop / skip events that exceed the rate limit. +// Otherwise use Reserve or Wait. +func (lim *Limiter) AllowN(now time.Time, n int) bool { + return lim.reserveN(now, n, 0).ok +} + +// A Reservation holds information about events that are permitted by a Limiter to happen after a delay. +// A Reservation may be canceled, which may enable the Limiter to permit additional events. +type Reservation struct { + ok bool + lim *Limiter + tokens int + timeToAct time.Time + // This is the Limit at reservation time, it can change later. + limit Limit +} + +// OK returns whether the limiter can provide the requested number of tokens +// within the maximum wait time. If OK is false, Delay returns InfDuration, and +// Cancel does nothing. +func (r *Reservation) OK() bool { + return r.ok +} + +// Delay is shorthand for DelayFrom(time.Now()). +func (r *Reservation) Delay() time.Duration { + return r.DelayFrom(time.Now()) +} + +// InfDuration is the duration returned by Delay when a Reservation is not OK. +const InfDuration = time.Duration(1<<63 - 1) + +// DelayFrom returns the duration for which the reservation holder must wait +// before taking the reserved action. Zero duration means act immediately. +// InfDuration means the limiter cannot grant the tokens requested in this +// Reservation within the maximum wait time. +func (r *Reservation) DelayFrom(now time.Time) time.Duration { + if !r.ok { + return InfDuration + } + delay := r.timeToAct.Sub(now) + if delay < 0 { + return 0 + } + return delay +} + +// Cancel is shorthand for CancelAt(time.Now()). +func (r *Reservation) Cancel() { + r.CancelAt(time.Now()) + return +} + +// CancelAt indicates that the reservation holder will not perform the reserved action +// and reverses the effects of this Reservation on the rate limit as much as possible, +// considering that other reservations may have already been made. +func (r *Reservation) CancelAt(now time.Time) { + if !r.ok { + return + } + + r.lim.mu.Lock() + defer r.lim.mu.Unlock() + + if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { + return + } + + // calculate tokens to restore + // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved + // after r was obtained. These tokens should not be restored. + restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) + if restoreTokens <= 0 { + return + } + // advance time to now + now, _, tokens := r.lim.advance(now) + // calculate new number of tokens + tokens += restoreTokens + if burst := float64(r.lim.burst); tokens > burst { + tokens = burst + } + // update state + r.lim.last = now + r.lim.tokens = tokens + if r.timeToAct == r.lim.lastEvent { + prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) + if !prevEvent.Before(now) { + r.lim.lastEvent = prevEvent + } + } + + return +} + +// Reserve is shorthand for ReserveN(time.Now(), 1). +func (lim *Limiter) Reserve() *Reservation { + return lim.ReserveN(time.Now(), 1) +} + +// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. +// The Limiter takes this Reservation into account when allowing future events. +// ReserveN returns false if n exceeds the Limiter's burst size. +// Usage example: +// r := lim.ReserveN(time.Now(), 1) +// if !r.OK() { +// // Not allowed to act! Did you remember to set lim.burst to be > 0 ? +// return +// } +// time.Sleep(r.Delay()) +// Act() +// Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. +// If you need to respect a deadline or cancel the delay, use Wait instead. +// To drop or skip events exceeding rate limit, use Allow instead. +func (lim *Limiter) ReserveN(now time.Time, n int) *Reservation { + r := lim.reserveN(now, n, InfDuration) + return &r +} + +// Wait is shorthand for WaitN(ctx, 1). +func (lim *Limiter) Wait(ctx context.Context) (err error) { + return lim.WaitN(ctx, 1) +} + +// WaitN blocks until lim permits n events to happen. +// It returns an error if n exceeds the Limiter's burst size, the Context is +// canceled, or the expected wait time exceeds the Context's Deadline. +// The burst limit is ignored if the rate limit is Inf. +func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { + if n > lim.burst && lim.limit != Inf { + return fmt.Errorf("rate: Wait(n=%d) exceeds limiter's burst %d", n, lim.burst) + } + // Check if ctx is already cancelled + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + // Determine wait limit + now := time.Now() + waitLimit := InfDuration + if deadline, ok := ctx.Deadline(); ok { + waitLimit = deadline.Sub(now) + } + // Reserve + r := lim.reserveN(now, n, waitLimit) + if !r.ok { + return fmt.Errorf("rate: Wait(n=%d) would exceed context deadline", n) + } + // Wait + t := time.NewTimer(r.DelayFrom(now)) + defer t.Stop() + select { + case <-t.C: + // We can proceed. + return nil + case <-ctx.Done(): + // Context was canceled before we could proceed. Cancel the + // reservation, which may permit other events to proceed sooner. + r.Cancel() + return ctx.Err() + } +} + +// SetLimit is shorthand for SetLimitAt(time.Now(), newLimit). +func (lim *Limiter) SetLimit(newLimit Limit) { + lim.SetLimitAt(time.Now(), newLimit) +} + +// SetLimitAt sets a new Limit for the limiter. The new Limit, and Burst, may be violated +// or underutilized by those which reserved (using Reserve or Wait) but did not yet act +// before SetLimitAt was called. +func (lim *Limiter) SetLimitAt(now time.Time, newLimit Limit) { + lim.mu.Lock() + defer lim.mu.Unlock() + + now, _, tokens := lim.advance(now) + + lim.last = now + lim.tokens = tokens + lim.limit = newLimit +} + +// reserveN is a helper method for AllowN, ReserveN, and WaitN. +// maxFutureReserve specifies the maximum reservation wait duration allowed. +// reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. +func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duration) Reservation { + lim.mu.Lock() + + if lim.limit == Inf { + lim.mu.Unlock() + return Reservation{ + ok: true, + lim: lim, + tokens: n, + timeToAct: now, + } + } + + now, last, tokens := lim.advance(now) + + // Calculate the remaining number of tokens resulting from the request. + tokens -= float64(n) + + // Calculate the wait duration + var waitDuration time.Duration + if tokens < 0 { + waitDuration = lim.limit.durationFromTokens(-tokens) + } + + // Decide result + ok := n <= lim.burst && waitDuration <= maxFutureReserve + + // Prepare reservation + r := Reservation{ + ok: ok, + lim: lim, + limit: lim.limit, + } + if ok { + r.tokens = n + r.timeToAct = now.Add(waitDuration) + } + + // Update state + if ok { + lim.last = now + lim.tokens = tokens + lim.lastEvent = r.timeToAct + } else { + lim.last = last + } + + lim.mu.Unlock() + return r +} + +// advance calculates and returns an updated state for lim resulting from the passage of time. +// lim is not changed. +func (lim *Limiter) advance(now time.Time) (newNow time.Time, newLast time.Time, newTokens float64) { + last := lim.last + if now.Before(last) { + last = now + } + + // Avoid making delta overflow below when last is very old. + maxElapsed := lim.limit.durationFromTokens(float64(lim.burst) - lim.tokens) + elapsed := now.Sub(last) + if elapsed > maxElapsed { + elapsed = maxElapsed + } + + // Calculate the new number of tokens, due to time that passed. + delta := lim.limit.tokensFromDuration(elapsed) + tokens := lim.tokens + delta + if burst := float64(lim.burst); tokens > burst { + tokens = burst + } + + return now, last, tokens +} + +// durationFromTokens is a unit conversion function from the number of tokens to the duration +// of time it takes to accumulate them at a rate of limit tokens per second. +func (limit Limit) durationFromTokens(tokens float64) time.Duration { + seconds := tokens / float64(limit) + return time.Nanosecond * time.Duration(1e9*seconds) +} + +// tokensFromDuration is a unit conversion function from a time duration to the number of tokens +// which could be accumulated during that duration at a rate of limit tokens per second. +func (limit Limit) tokensFromDuration(d time.Duration) float64 { + return d.Seconds() * float64(limit) +} diff --git a/vendor/vendor.json b/vendor/vendor.json index 33f63e6498..c33c029cb1 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -867,6 +867,12 @@ "revision": "c589d0c9f0d81640c518354c7bcae77d99820aa3", "revisionTime": "2016-09-30T00:14:02Z" }, + { + "checksumSHA1": "eFQDEix/mGnhwnFu/Hq63zMfrX8=", + "path": "golang.org/x/time/rate", + "revision": "f51c12702a4d776e4c1fa9b0fabab841babae631", + "revisionTime": "2016-10-28T04:02:39Z" + }, { "checksumSHA1": "AjdmRXf0fiy6Bec9mNlsGsmZi1k=", "path": "google.golang.org/api/compute/v1", From f152ac5e23943fd22243896533df626ba89f3d54 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 27 Feb 2017 20:31:16 +0100 Subject: [PATCH 37/37] notifier: Allow swapping out HTTP Doer We need to be able to modify the HTTP POST in Weave Cortex to add multitenancy information to a notification. Since we only really need a special header in the end, the other option would be to just allow passing in headers to the notifier. But swapping out the whole Doer is more general and allows others to swap out the network-talky bits of the notifier for their own use. Doing this via contexts here wouldn't work well, due to the decoupled flow of data in the notifier. There was no existing interface containing the ctxhttp.Post() or ctxhttp.Do() methods, so I settled on just using Do() as a swappable function directly (and with a more minimal signature than Post). --- notifier/notifier.go | 13 ++++++++++++- notifier/notifier_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/notifier/notifier.go b/notifier/notifier.go index b896d53a40..e9af00a4ca 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -77,12 +77,18 @@ type Options struct { QueueCapacity int ExternalLabels model.LabelSet RelabelConfigs []*config.RelabelConfig + // Used for sending HTTP requests to the Alertmanager. + Do func(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) } // New constructs a new Notifier. func New(o *Options) *Notifier { ctx, cancel := context.WithCancel(context.Background()) + if o.Do == nil { + o.Do = ctxhttp.Do + } + return &Notifier{ queue: make(model.Alerts, 0, o.QueueCapacity), ctx: ctx, @@ -351,7 +357,12 @@ func (n *Notifier) sendAll(alerts ...*model.Alert) bool { } func (n *Notifier) sendOne(ctx context.Context, c *http.Client, url string, b []byte) error { - resp, err := ctxhttp.Post(ctx, c, url, contentTypeJSON, bytes.NewReader(b)) + req, err := http.NewRequest("POST", url, bytes.NewReader(b)) + if err != nil { + return err + } + req.Header.Set("Content-Type", contentTypeJSON) + resp, err := n.opts.Do(ctx, c, req) if err != nil { return err } diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index 65c49a3fad..3587a1a8a7 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -16,12 +16,15 @@ package notifier import ( "encoding/json" "fmt" + "io/ioutil" "net/http" "net/http/httptest" "reflect" "testing" "time" + "golang.org/x/net/context" + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" ) @@ -191,6 +194,37 @@ func TestHandlerSendAll(t *testing.T) { } } +func TestCustomDo(t *testing.T) { + const testURL = "http://testurl.com/" + const testBody = "testbody" + + var received bool + h := New(&Options{ + Do: func(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) { + received = true + body, err := ioutil.ReadAll(req.Body) + if err != nil { + t.Fatalf("Unable to read request body: %v", err) + } + if string(body) != testBody { + t.Fatalf("Unexpected body; want %v, got %v", testBody, string(body)) + } + if req.URL.String() != testURL { + t.Fatalf("Unexpected URL; want %v, got %v", testURL, req.URL.String()) + } + return &http.Response{ + Body: ioutil.NopCloser(nil), + }, nil + }, + }) + + h.sendOne(context.Background(), nil, testURL, []byte(testBody)) + + if !received { + t.Fatal("Expected to receive an alert, but didn't") + } +} + func TestExternalLabels(t *testing.T) { h := New(&Options{ QueueCapacity: 3 * maxBatchSize,