From 2007a49f1f3e75015803fa30f5297b96f9d35bc5 Mon Sep 17 00:00:00 2001 From: Thibault Jamet Date: Fri, 10 Feb 2023 21:52:25 +0100 Subject: [PATCH 1/2] Improve MinEventInterval compliance with docs **Description** In the command line arguments, we see `min-event-sync-interval` as "The minimum interval between two consecutive synchronizations triggered from kubernetes events" In the code, it actually acts a different way. It imposes a certain dealy between syncs. While this is compliant with the "minimum delay between 2 consecutive synchronizations", it has side-effects in case of large delays. In particular, when trying to fine-tune external-dns to match the provider rate-limits. In this case, it may be interesting to restrict the rate of reconciling actions happening by having a high `min-event-sync-interval`, while keeping a low latency for initial events. This would allow to maximise the bulk effect of high change rate while keeping fast enough reaction for isolated changes. **Checklist** - [X] Unit tests updated - [X] End user documentation updated > End user documentation matches the updated behaviour with more > accuracy Change-Id: Ibcea707974a095a2d5861a3974b4c79e5a15b00e --- controller/controller.go | 48 +++++++++++++++++++++++++++-------- controller/controller_test.go | 9 ++++++- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index a3ef2e8b2..c8bb6ff18 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -189,8 +189,10 @@ type Controller struct { DomainFilter endpoint.DomainFilter // The nextRunAt used for throttling and batching reconciliation nextRunAt time.Time - // The nextRunAtMux is for atomic updating of nextRunAt - nextRunAtMux sync.Mutex + // The runAtMutex is for atomic updating of nextRunAt and lastRunAt + runAtMutex sync.Mutex + // The lastRunAt used for throttling and batching reconciliation + lastRunAt time.Time // MangedRecordTypes are DNS record types that will be considered for management. ManagedRecordTypes []string // ExcludeRecordTypes are DNS record types that will be excluded from management. @@ -203,6 +205,10 @@ type Controller struct { func (c *Controller) RunOnce(ctx context.Context) error { lastReconcileTimestamp.SetToCurrentTime() + c.runAtMutex.Lock() + c.lastRunAt = time.Now() + c.runAtMutex.Unlock() + records, err := c.Registry.Records(ctx) if err != nil { registryErrorsTotal.Inc() @@ -264,6 +270,24 @@ func (c *Controller) RunOnce(ctx context.Context) error { return nil } +func earliest(r time.Time, times ...time.Time) time.Time { + for _, t := range times { + if t.Before(r) { + r = t + } + } + return r +} + +func latest(r time.Time, times ...time.Time) time.Time { + for _, t := range times { + if t.After(r) { + r = t + } + } + return r +} + // Counts the intersections of A and AAAA records in endpoint and registry. func countMatchingAddressRecords(endpoints []*endpoint.Endpoint, registryRecords []*endpoint.Endpoint) (int, int) { recordsMap := make(map[string]map[string]struct{}) @@ -306,18 +330,20 @@ func countAddressRecords(endpoints []*endpoint.Endpoint) (int, int) { // ScheduleRunOnce makes sure execution happens at most once per interval. func (c *Controller) ScheduleRunOnce(now time.Time) { - c.nextRunAtMux.Lock() - defer c.nextRunAtMux.Unlock() - // schedule only if a reconciliation is not already planned - // to happen in the following c.MinEventSyncInterval - if !c.nextRunAt.Before(now.Add(c.MinEventSyncInterval)) { - c.nextRunAt = now.Add(c.MinEventSyncInterval) - } + c.runAtMutex.Lock() + defer c.runAtMutex.Unlock() + c.nextRunAt = latest( + c.lastRunAt.Add(c.MinEventSyncInterval), + earliest( + now.Add(5*time.Second), + c.nextRunAt, + ), + ) } func (c *Controller) ShouldRunOnce(now time.Time) bool { - c.nextRunAtMux.Lock() - defer c.nextRunAtMux.Unlock() + c.runAtMutex.Lock() + defer c.runAtMutex.Unlock() if now.Before(c.nextRunAt) { return false } diff --git a/controller/controller_test.go b/controller/controller_test.go index 7fa83f501..e95aa9802 100644 --- a/controller/controller_test.go +++ b/controller/controller_test.go @@ -278,15 +278,17 @@ func valueFromMetric(metric prometheus.Gauge) uint64 { } func TestShouldRunOnce(t *testing.T) { - ctrl := &Controller{Interval: 10 * time.Minute, MinEventSyncInterval: 5 * time.Second} + ctrl := &Controller{Interval: 10 * time.Minute, MinEventSyncInterval: 15 * time.Second} now := time.Now() // First run of Run loop should execute RunOnce assert.True(t, ctrl.ShouldRunOnce(now)) + assert.Equal(t, now.Add(10*time.Minute), ctrl.nextRunAt) // Second run should not assert.False(t, ctrl.ShouldRunOnce(now)) + ctrl.lastRunAt = now now = now.Add(10 * time.Second) // Changes happen in ingresses or services @@ -316,12 +318,17 @@ func TestShouldRunOnce(t *testing.T) { assert.False(t, ctrl.ShouldRunOnce(now)) // Multiple ingresses or services changes, closer than MinInterval from each other + ctrl.lastRunAt = now firstChangeTime := now secondChangeTime := firstChangeTime.Add(time.Second) // First change ctrl.ScheduleRunOnce(firstChangeTime) // Second change ctrl.ScheduleRunOnce(secondChangeTime) + + // Executions should be spaced by at least MinEventSyncInterval + assert.False(t, ctrl.ShouldRunOnce(now.Add(5*time.Second))) + // Should not postpone the reconciliation further than firstChangeTime + MinInterval now = now.Add(ctrl.MinEventSyncInterval) assert.True(t, ctrl.ShouldRunOnce(now)) From 1b5ed44e4841ae591fe1dd174916217d07c93058 Mon Sep 17 00:00:00 2001 From: Thibault Jamet Date: Mon, 13 Feb 2023 17:59:25 +0100 Subject: [PATCH 2/2] Improve documentation --- docs/tutorials/aws.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/aws.md b/docs/tutorials/aws.md index d4ac65741..6bc55dd0f 100644 --- a/docs/tutorials/aws.md +++ b/docs/tutorials/aws.md @@ -971,8 +971,9 @@ Route53 has a [5 API requests per second per account hard quota](https://docs.aw Running several fast polling ExternalDNS instances in a given account can easily hit that limit. Some ways to reduce the request rate include: * Reduce the polling loop's synchronization interval at the possible cost of slower change propagation (but see `--events` below to reduce the impact). * `--interval=5m` (default `1m`) -* Trigger the polling loop on changes to K8s objects, rather than only at `interval`, to have responsive updates with long poll intervals +* Trigger the polling loop on changes to K8s objects, rather than only at `interval` and ensure a minimum of time between events, to have responsive updates with long poll intervals * `--events` + * `--min-event-sync-interval=5m` (default `5s`) * Limit the [sources watched](https://github.com/kubernetes-sigs/external-dns/blob/master/pkg/apis/externaldns/types.go#L364) when the `--events` flag is specified to specific types, namespaces, labels, or annotations * `--source=ingress --source=service` - specify multiple times for multiple sources * `--namespace=my-app`