diff --git a/.golangci.yml b/.golangci.yml index 76de2af75..634255643 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -97,7 +97,7 @@ linters: path: istio_virtualservice.go|fqdn.go|cloudflare.go - linters: - gochecknoinits - path: ^(.*/controller\.go|internal/.*/init\.go|pkg/.*/metrics\.go|.*/webhook\.go|.*/http\.go|apis/.*\.go|.*/cached_provider\.go)$ + path: ^(internal/.*/init\.go|.*/metrics\.go|.*/webhook\.go|.*/http\.go|apis/.*\.go|.*/cached_provider\.go)$ - linters: - modernize path: ^(apis/.*\.go)$ diff --git a/controller/controller.go b/controller/controller.go index d75bafc43..4ad344f70 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -23,137 +23,16 @@ import ( "sync" "time" - "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" "sigs.k8s.io/external-dns/endpoint" "sigs.k8s.io/external-dns/pkg/events" - "sigs.k8s.io/external-dns/pkg/metrics" "sigs.k8s.io/external-dns/plan" "sigs.k8s.io/external-dns/provider" "sigs.k8s.io/external-dns/registry" "sigs.k8s.io/external-dns/source" ) -var ( - registryErrorsTotal = metrics.NewCounterWithOpts( - prometheus.CounterOpts{ - Subsystem: "registry", - Name: "errors_total", - Help: "Number of Registry errors.", - }, - ) - sourceErrorsTotal = metrics.NewCounterWithOpts( - prometheus.CounterOpts{ - Subsystem: "source", - Name: "errors_total", - Help: "Number of Source errors.", - }, - ) - sourceEndpointsTotal = metrics.NewGaugeWithOpts( - prometheus.GaugeOpts{ - Subsystem: "source", - Name: "endpoints_total", - Help: "Number of Endpoints in all sources", - }, - ) - registryEndpointsTotal = metrics.NewGaugeWithOpts( - prometheus.GaugeOpts{ - Subsystem: "registry", - Name: "endpoints_total", - Help: "Number of Endpoints in the registry", - }, - ) - lastSyncTimestamp = metrics.NewGaugeWithOpts( - prometheus.GaugeOpts{ - Subsystem: "controller", - Name: "last_sync_timestamp_seconds", - Help: "Timestamp of last successful sync with the DNS provider", - }, - ) - lastReconcileTimestamp = metrics.NewGaugeWithOpts( - prometheus.GaugeOpts{ - Subsystem: "controller", - Name: "last_reconcile_timestamp_seconds", - Help: "Timestamp of last attempted sync with the DNS provider", - }, - ) - controllerNoChangesTotal = metrics.NewCounterWithOpts( - prometheus.CounterOpts{ - Subsystem: "controller", - Name: "no_op_runs_total", - Help: "Number of reconcile loops ending up with no changes on the DNS provider side.", - }, - ) - deprecatedRegistryErrors = metrics.NewCounterWithOpts( - prometheus.CounterOpts{ - Subsystem: "registry", - Name: "errors_total", - Help: "Number of Registry errors.", - }, - ) - deprecatedSourceErrors = metrics.NewCounterWithOpts( - prometheus.CounterOpts{ - Subsystem: "source", - Name: "errors_total", - Help: "Number of Source errors.", - }, - ) - - registryRecords = metrics.NewGaugedVectorOpts( - prometheus.GaugeOpts{ - Subsystem: "registry", - Name: "records", - Help: "Number of registry records partitioned by label name (vector).", - }, - []string{"record_type"}, - ) - - sourceRecords = metrics.NewGaugedVectorOpts( - prometheus.GaugeOpts{ - Subsystem: "source", - Name: "records", - Help: "Number of source records partitioned by label name (vector).", - }, - []string{"record_type"}, - ) - - verifiedRecords = metrics.NewGaugedVectorOpts( - prometheus.GaugeOpts{ - Subsystem: "controller", - Name: "verified_records", - Help: "Number of DNS records that exists both in source and registry (vector).", - }, - []string{"record_type"}, - ) - - consecutiveSoftErrors = metrics.NewGaugeWithOpts( - prometheus.GaugeOpts{ - Subsystem: "controller", - Name: "consecutive_soft_errors", - Help: "Number of consecutive soft errors in reconciliation loop.", - }, - ) -) - -func init() { - metrics.RegisterMetric.MustRegister(registryErrorsTotal) - metrics.RegisterMetric.MustRegister(sourceErrorsTotal) - metrics.RegisterMetric.MustRegister(sourceEndpointsTotal) - metrics.RegisterMetric.MustRegister(registryEndpointsTotal) - metrics.RegisterMetric.MustRegister(lastSyncTimestamp) - metrics.RegisterMetric.MustRegister(lastReconcileTimestamp) - metrics.RegisterMetric.MustRegister(deprecatedRegistryErrors) - metrics.RegisterMetric.MustRegister(deprecatedSourceErrors) - metrics.RegisterMetric.MustRegister(controllerNoChangesTotal) - - metrics.RegisterMetric.MustRegister(registryRecords) - metrics.RegisterMetric.MustRegister(sourceRecords) - metrics.RegisterMetric.MustRegister(verifiedRecords) - - metrics.RegisterMetric.MustRegister(consecutiveSoftErrors) -} - // Controller is responsible for orchestrating the different components. // It works in the following way: // * Ask the DNS provider for the current list of endpoints. @@ -194,8 +73,6 @@ func (c *Controller) RunOnce(ctx context.Context) error { c.lastRunAt = time.Now() c.runAtMutex.Unlock() - regMetrics := newMetricsRecorder() - regRecords, err := c.Registry.Records(ctx) if err != nil { registryErrorsTotal.Counter.Inc() @@ -205,7 +82,7 @@ func (c *Controller) RunOnce(ctx context.Context) error { registryEndpointsTotal.Gauge.Set(float64(len(regRecords))) - countAddressRecords(regMetrics, regRecords, registryRecords) + countAddressRecords(regRecords, registryRecords) ctx = context.WithValue(ctx, provider.RecordsContextKey, regRecords) @@ -218,11 +95,8 @@ func (c *Controller) RunOnce(ctx context.Context) error { sourceEndpointsTotal.Gauge.Set(float64(len(sourceEndpoints))) - sourceMetrics := newMetricsRecorder() - countAddressRecords(sourceMetrics, sourceEndpoints, sourceRecords) - - vaMetrics := newMetricsRecorder() - countMatchingAddressRecords(vaMetrics, sourceEndpoints, regRecords, verifiedRecords) + countAddressRecords(sourceEndpoints, sourceRecords) + countMatchingAddressRecords(sourceEndpoints, regRecords, verifiedRecords) endpoints, err := c.Registry.AdjustEndpoints(sourceEndpoints) if err != nil { @@ -280,43 +154,6 @@ func latest(r time.Time, times ...time.Time) time.Time { return r } -// Counts the intersections of records in endpoint and registry. -func countMatchingAddressRecords(rec *metricsRecorder, endpoints []*endpoint.Endpoint, registryRecords []*endpoint.Endpoint, metric metrics.GaugeVecMetric) { - recordsMap := make(map[string]map[string]struct{}) - for _, regRecord := range registryRecords { - if _, found := recordsMap[regRecord.DNSName]; !found { - recordsMap[regRecord.DNSName] = make(map[string]struct{}) - } - recordsMap[regRecord.DNSName][regRecord.RecordType] = struct{}{} - } - - for _, sourceRecord := range endpoints { - if _, found := recordsMap[sourceRecord.DNSName]; found { - if _, ok := recordsMap[sourceRecord.DNSName][sourceRecord.RecordType]; ok { - rec.recordEndpointType(sourceRecord.RecordType) - } - } - } - - for _, rt := range endpoint.KnownRecordTypes { - metric.SetWithLabels(rec.loadFloat64(rt), rt) - } -} - -// countAddressRecords updates the metricsRecorder with the count of each record type -// found in the provided endpoints slice, and sets the corresponding metrics for each -// known DNS record type using the sourceRecords metric. -func countAddressRecords(rec *metricsRecorder, endpoints []*endpoint.Endpoint, metric metrics.GaugeVecMetric) { - // compute the number of records per type - for _, endPoint := range endpoints { - rec.recordEndpointType(endPoint.RecordType) - } - // set metrics for each record type - for _, rt := range endpoint.KnownRecordTypes { - metric.SetWithLabels(rec.loadFloat64(rt), rt) - } -} - // ScheduleRunOnce makes sure execution happens at most once per interval. func (c *Controller) ScheduleRunOnce(now time.Time) { c.runAtMutex.Lock() diff --git a/controller/metrics.go b/controller/metrics.go index 940addb29..ad0b4ee0b 100644 --- a/controller/metrics.go +++ b/controller/metrics.go @@ -16,39 +16,168 @@ limitations under the License. package controller -import "sigs.k8s.io/external-dns/endpoint" +import ( + "github.com/prometheus/client_golang/prometheus" -type metricsRecorder struct { - counterPerEndpointType map[string]int -} + "sigs.k8s.io/external-dns/endpoint" + "sigs.k8s.io/external-dns/pkg/metrics" +) -func newMetricsRecorder() *metricsRecorder { - return &metricsRecorder{ - counterPerEndpointType: map[string]int{ - endpoint.RecordTypeA: 0, - endpoint.RecordTypeAAAA: 0, - endpoint.RecordTypeCNAME: 0, - endpoint.RecordTypeTXT: 0, - endpoint.RecordTypeSRV: 0, - endpoint.RecordTypeNS: 0, - endpoint.RecordTypePTR: 0, - endpoint.RecordTypeMX: 0, - endpoint.RecordTypeNAPTR: 0, +var ( + registryErrorsTotal = metrics.NewCounterWithOpts( + prometheus.CounterOpts{ + Subsystem: "registry", + Name: "errors_total", + Help: "Number of Registry errors.", }, + ) + sourceErrorsTotal = metrics.NewCounterWithOpts( + prometheus.CounterOpts{ + Subsystem: "source", + Name: "errors_total", + Help: "Number of Source errors.", + }, + ) + sourceEndpointsTotal = metrics.NewGaugeWithOpts( + prometheus.GaugeOpts{ + Subsystem: "source", + Name: "endpoints_total", + Help: "Number of Endpoints in all sources", + }, + ) + registryEndpointsTotal = metrics.NewGaugeWithOpts( + prometheus.GaugeOpts{ + Subsystem: "registry", + Name: "endpoints_total", + Help: "Number of Endpoints in the registry", + }, + ) + lastSyncTimestamp = metrics.NewGaugeWithOpts( + prometheus.GaugeOpts{ + Subsystem: "controller", + Name: "last_sync_timestamp_seconds", + Help: "Timestamp of last successful sync with the DNS provider", + }, + ) + lastReconcileTimestamp = metrics.NewGaugeWithOpts( + prometheus.GaugeOpts{ + Subsystem: "controller", + Name: "last_reconcile_timestamp_seconds", + Help: "Timestamp of last attempted sync with the DNS provider", + }, + ) + controllerNoChangesTotal = metrics.NewCounterWithOpts( + prometheus.CounterOpts{ + Subsystem: "controller", + Name: "no_op_runs_total", + Help: "Number of reconcile loops ending up with no changes on the DNS provider side.", + }, + ) + deprecatedRegistryErrors = metrics.NewCounterWithOpts( + prometheus.CounterOpts{ + Subsystem: "registry", + Name: "errors_total", + Help: "Number of Registry errors.", + }, + ) + deprecatedSourceErrors = metrics.NewCounterWithOpts( + prometheus.CounterOpts{ + Subsystem: "source", + Name: "errors_total", + Help: "Number of Source errors.", + }, + ) + + registryRecords = metrics.NewGaugedVectorOpts( + prometheus.GaugeOpts{ + Subsystem: "registry", + Name: "records", + Help: "Number of registry records partitioned by label name (vector).", + }, + []string{"record_type"}, + ) + + sourceRecords = metrics.NewGaugedVectorOpts( + prometheus.GaugeOpts{ + Subsystem: "source", + Name: "records", + Help: "Number of source records partitioned by label name (vector).", + }, + []string{"record_type"}, + ) + + verifiedRecords = metrics.NewGaugedVectorOpts( + prometheus.GaugeOpts{ + Subsystem: "controller", + Name: "verified_records", + Help: "Number of DNS records that exists both in source and registry (vector).", + }, + []string{"record_type"}, + ) + + consecutiveSoftErrors = metrics.NewGaugeWithOpts( + prometheus.GaugeOpts{ + Subsystem: "controller", + Name: "consecutive_soft_errors", + Help: "Number of consecutive soft errors in reconciliation loop.", + }, + ) +) + +func init() { + metrics.RegisterMetric.MustRegister(registryErrorsTotal) + metrics.RegisterMetric.MustRegister(sourceErrorsTotal) + metrics.RegisterMetric.MustRegister(sourceEndpointsTotal) + metrics.RegisterMetric.MustRegister(registryEndpointsTotal) + metrics.RegisterMetric.MustRegister(lastSyncTimestamp) + metrics.RegisterMetric.MustRegister(lastReconcileTimestamp) + metrics.RegisterMetric.MustRegister(deprecatedRegistryErrors) + metrics.RegisterMetric.MustRegister(deprecatedSourceErrors) + metrics.RegisterMetric.MustRegister(controllerNoChangesTotal) + + metrics.RegisterMetric.MustRegister(registryRecords) + metrics.RegisterMetric.MustRegister(sourceRecords) + metrics.RegisterMetric.MustRegister(verifiedRecords) + + metrics.RegisterMetric.MustRegister(consecutiveSoftErrors) +} + +type dnsKey struct { + name string + recordType string +} + +// countMatchingAddressRecords counts records that exist in both endpoints and registry. +func countMatchingAddressRecords(endpoints []*endpoint.Endpoint, registryRecords []*endpoint.Endpoint, metric metrics.GaugeVecMetric) { + metric.Gauge.Reset() + + registry := make(map[dnsKey]struct{}, len(registryRecords)) + for _, r := range registryRecords { + registry[dnsKey{r.DNSName, r.RecordType}] = struct{}{} + } + + counts := make(map[string]float64) + for _, ep := range endpoints { + if _, found := registry[dnsKey{ep.DNSName, ep.RecordType}]; found { + counts[ep.RecordType]++ + } + } + + for recordType, count := range counts { + metric.AddWithLabels(count, recordType) } } -func (m *metricsRecorder) recordEndpointType(endpointType string) { - m.counterPerEndpointType[endpointType]++ -} +// countAddressRecords counts each record type in the provided endpoints slice. +func countAddressRecords(endpoints []*endpoint.Endpoint, metric metrics.GaugeVecMetric) { + metric.Gauge.Reset() -func (m *metricsRecorder) getEndpointTypeCount(endpointType string) int { - if count, ok := m.counterPerEndpointType[endpointType]; ok { - return count + counts := make(map[string]float64) + for _, ep := range endpoints { + counts[ep.RecordType]++ } - return 0 -} -func (m *metricsRecorder) loadFloat64(endpointType string) float64 { - return float64(m.getEndpointTypeCount(endpointType)) + for recordType, count := range counts { + metric.AddWithLabels(count, recordType) + } } diff --git a/controller/metrics_test.go b/controller/metrics_test.go index 8f4c39906..9e27ae148 100644 --- a/controller/metrics_test.go +++ b/controller/metrics_test.go @@ -20,7 +20,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "sigs.k8s.io/external-dns/endpoint" "sigs.k8s.io/external-dns/internal/testutils" "sigs.k8s.io/external-dns/pkg/apis/externaldns" @@ -28,41 +27,6 @@ import ( "sigs.k8s.io/external-dns/registry" ) -func TestRecordKnownEndpointType(t *testing.T) { - mr := newMetricsRecorder() - - // Recording a built-in type should start at 1 and increment - mr.recordEndpointType(endpoint.RecordTypeA) - assert.Equal(t, 1, mr.getEndpointTypeCount(endpoint.RecordTypeA)) - - mr.recordEndpointType(endpoint.RecordTypeA) - assert.Equal(t, 2, mr.getEndpointTypeCount(endpoint.RecordTypeA)) -} - -func TestRecordUnknownEndpointType(t *testing.T) { - mr := newMetricsRecorder() - const customType = "CUSTOM" - - // Unknown types start at zero - assert.Equal(t, 0, mr.getEndpointTypeCount(customType)) - - // First record sets to 1 - mr.recordEndpointType(customType) - assert.Equal(t, 1, mr.getEndpointTypeCount(customType)) - - // Subsequent records increment - mr.recordEndpointType(customType) - assert.Equal(t, 2, mr.getEndpointTypeCount(customType)) -} - -func TestLoadFloat64(t *testing.T) { - mr := newMetricsRecorder() - - // loadFloat64 should return the float64 representation of the count - mr.recordEndpointType(endpoint.RecordTypeAAAA) - assert.InDelta(t, float64(1), mr.loadFloat64(endpoint.RecordTypeAAAA), 0.0001) -} - func TestVerifyARecords(t *testing.T) { testControllerFiltersDomains( t, @@ -322,6 +286,26 @@ func TestAAAARecords(t *testing.T) { } func TestGaugeMetricsWithMixedRecords(t *testing.T) { + ctrl := newMixedRecordsFixture() + + assert.NoError(t, ctrl.RunOnce(t.Context())) + + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 534, sourceRecords.Gauge, map[string]string{"record_type": "a"}) + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 324, sourceRecords.Gauge, map[string]string{"record_type": "aaaa"}) + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 2, sourceRecords.Gauge, map[string]string{"record_type": "cname"}) + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 11, sourceRecords.Gauge, map[string]string{"record_type": "srv"}) + + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 5334, registryRecords.Gauge, map[string]string{"record_type": "a"}) + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 324, registryRecords.Gauge, map[string]string{"record_type": "aaaa"}) + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 0, registryRecords.Gauge, map[string]string{"record_type": "mx"}) + testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 43, registryRecords.Gauge, map[string]string{"record_type": "ptr"}) +} + +type mixedRecordsFixture struct { + ctrl *Controller +} + +func newMixedRecordsFixture() *Controller { configuredEndpoints := testutils.GenerateTestEndpointsByType(map[string]int{ endpoint.RecordTypeA: 534, endpoint.RecordTypeAAAA: 324, @@ -351,27 +335,23 @@ func TestGaugeMetricsWithMixedRecords(t *testing.T) { provider := &filteredMockProvider{ RecordsStore: providerEndpoints, } - r, err := registry.SelectRegistry(cfg, provider) + r, _ := registry.SelectRegistry(cfg, provider) - require.NoError(t, err) - - ctrl := &Controller{ + return &Controller{ Source: source, Registry: r, Policy: &plan.SyncPolicy{}, DomainFilter: endpoint.NewDomainFilter([]string{}), ManagedRecordTypes: cfg.ManagedDNSRecordTypes, } - - assert.NoError(t, ctrl.RunOnce(t.Context())) - - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 534, sourceRecords.Gauge, map[string]string{"record_type": "a"}) - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 324, sourceRecords.Gauge, map[string]string{"record_type": "aaaa"}) - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 2, sourceRecords.Gauge, map[string]string{"record_type": "cname"}) - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 11, sourceRecords.Gauge, map[string]string{"record_type": "srv"}) - - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 5334, registryRecords.Gauge, map[string]string{"record_type": "a"}) - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 324, registryRecords.Gauge, map[string]string{"record_type": "aaaa"}) - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 0, registryRecords.Gauge, map[string]string{"record_type": "mx"}) - testutils.TestHelperVerifyMetricsGaugeVectorWithLabels(t, 43, registryRecords.Gauge, map[string]string{"record_type": "ptr"}) +} + +func BenchmarkGaugeMetricsWithMixedRecords(b *testing.B) { + ctrl := newMixedRecordsFixture() + + for b.Loop() { + if err := ctrl.RunOnce(b.Context()); err != nil { + b.Fatal(err) + } + } } diff --git a/pkg/metrics/models.go b/pkg/metrics/models.go index 9998fac16..b3df676c1 100644 --- a/pkg/metrics/models.go +++ b/pkg/metrics/models.go @@ -81,10 +81,16 @@ func (g GaugeVecMetric) Get() *Metric { // SetWithLabels sets the value of the Gauge metric for the specified label values. // All label values are converted to lowercase before being applied. func (g GaugeVecMetric) SetWithLabels(value float64, lvs ...string) { - for i, v := range lvs { - lvs[i] = strings.ToLower(v) - } - g.Gauge.WithLabelValues(lvs...).Set(value) + g.Gauge.WithLabelValues(toLower(lvs)...).Set(value) +} + +// AddWithLabels adds the value to the Gauge metric for the specified label values. +// All label values are converted to lowercase before being applied. +// +// Without Reset(), values accumulate and reset only on process restart. +// Use Reset() + AddWithLabels() pattern for per-cycle counts. +func (g GaugeVecMetric) AddWithLabels(value float64, lvs ...string) { + g.Gauge.WithLabelValues(toLower(lvs)...).Add(value) } func NewGaugeWithOpts(opts prometheus.GaugeOpts) GaugeMetric { @@ -209,3 +215,13 @@ func PathProcessor(path string) string { parts := strings.Split(path, "/") return parts[len(parts)-1] } + +// toLower converts all label values to lowercase. +// The Prometheus maintainers have intentionally avoided magic transformations to keep label handling explicit and predictable. +// We expect consistent casing, normalizing at ingestion is the standard practice. +func toLower(lvs []string) []string { + for i := range lvs { + lvs[i] = strings.ToLower(lvs[i]) + } + return lvs +} diff --git a/pkg/metrics/models_test.go b/pkg/metrics/models_test.go index 1dd7c6118..4d840bdd3 100644 --- a/pkg/metrics/models_test.go +++ b/pkg/metrics/models_test.go @@ -241,3 +241,39 @@ func TestPathProcessor(t *testing.T) { }) } } + +func TestGaugeV_AddWithLabels(t *testing.T) { + opts := prometheus.GaugeOpts{ + Name: "test_gauge_add", + Namespace: "test_ns", + Subsystem: "test_sub", + Help: "help text", + } + gv := NewGaugedVectorOpts(opts, []string{"label1", "label2"}) + + // Add with mixed case labels - should be lowercased + gv.AddWithLabels(1.0, "Alpha", "BETA") + + g, err := gv.Gauge.GetMetricWithLabelValues("alpha", "beta") + assert.NoError(t, err) + + var m dto.Metric + err = g.Write(&m) + assert.NoError(t, err) + assert.NotNil(t, m.Gauge) + assert.InDelta(t, 1.0, *m.Gauge.Value, 0.01) + + // Add again - should increment, not override + gv.AddWithLabels(2.0, "ALPHA", "beta") + err = g.Write(&m) + assert.NoError(t, err) + assert.InDelta(t, 3.0, *m.Gauge.Value, 0.01) // 1.0 + 2.0 = 3.0 + + // Add one more time + gv.AddWithLabels(0.5, "alpha", "Beta") + err = g.Write(&m) + assert.NoError(t, err) + assert.InDelta(t, 3.5, *m.Gauge.Value, 0.01) // 3.0 + 0.5 = 3.5 + + assert.Len(t, m.Label, 2) +}