From 048f81218ddfc03bd33c9a7346bfd0ea72f498be Mon Sep 17 00:00:00 2001 From: Nevill Date: Fri, 22 Feb 2019 17:24:19 +0800 Subject: [PATCH] Change prometheus_sd_configs_failed_total to Gauge Signed-off-by: Nevill --- discovery/manager.go | 11 +++-- discovery/manager_test.go | 87 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/discovery/manager.go b/discovery/manager.go index 4625e42a31..00293ca66e 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -41,10 +41,10 @@ import ( ) var ( - failedConfigs = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_configs_failed_total", - Help: "Total number of service discovery configurations that failed to load.", + failedConfigs = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "prometheus_sd_failed_configs", + Help: "Current number of service discovery configurations that failed to load.", }, []string{"name"}, ) @@ -194,6 +194,9 @@ func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) e m.targets = make(map[poolKey]map[string]*targetgroup.Group) m.providers = nil m.discoverCancel = nil + + failedConfigs.WithLabelValues(m.name).Set(0) + for name, scfg := range cfg { m.registerProviders(scfg, name) discoveredTargets.WithLabelValues(m.name, name).Set(0) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index b2bff1fc40..9e5b229fb4 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -25,6 +25,8 @@ import ( "time" "github.com/go-kit/kit/log" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" sd_config "github.com/prometheus/prometheus/discovery/config" @@ -949,6 +951,91 @@ scrape_configs: } } +func TestGaugeFailedConfigs(t *testing.T) { + var ( + fcGauge prometheus.Gauge + err error + ) + + cfgOneText := ` +scrape_configs: +- job_name: prometheus + consul_sd_configs: + - server: "foo:8500" + tls_config: + cert_file: "/tmp/non_existent" + - server: "bar:8500" + tls_config: + cert_file: "/tmp/non_existent" + - server: "foo2:8500" + tls_config: + cert_file: "/tmp/non_existent" +` + cfgOne := &config.Config{} + + err = yaml.UnmarshalStrict([]byte(cfgOneText), cfgOne) + if err != nil { + t.Fatalf("Unable to load YAML config cfgOne: %s", err) + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager.updatert = 100 * time.Millisecond + go discoveryManager.Run() + + c := make(map[string]sd_config.ServiceDiscoveryConfig) + for _, v := range cfgOne.ScrapeConfigs { + c[v.JobName] = v.ServiceDiscoveryConfig + } + + discoveryManager.ApplyConfig(c) + <-discoveryManager.SyncCh() + + metricOne := &dto.Metric{} + fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name) + if err != nil { + t.Fatal(err) + } + + fcGauge.Write(metricOne) + + failedCount := metricOne.GetGauge().GetValue() + if failedCount != 3 { + t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) + } + + cfgTwoText := ` +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ["foo:9090"] +` + cfgTwo := &config.Config{} + if err := yaml.UnmarshalStrict([]byte(cfgTwoText), cfgTwo); err != nil { + t.Fatalf("Unable to load YAML config cfgTwo: %s", err) + } + c = make(map[string]sd_config.ServiceDiscoveryConfig) + for _, v := range cfgTwo.ScrapeConfigs { + c[v.JobName] = v.ServiceDiscoveryConfig + } + + discoveryManager.ApplyConfig(c) + <-discoveryManager.SyncCh() + + metricTwo := &dto.Metric{} + fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name) + if err != nil { + t.Fatal(err) + } + fcGauge.Write(metricTwo) + + failedCount = metricTwo.GetGauge().GetValue() + if failedCount != 0 { + t.Fatalf("Expected to get no failed config, got: %v", failedCount) + } + +} + func TestCoordinationWithReceiver(t *testing.T) { updateDelay := 100 * time.Millisecond