From c0a9eed521ad5108d73532a0dbc61f71a633d306 Mon Sep 17 00:00:00 2001 From: Markus Date: Mon, 7 Apr 2025 16:34:40 +0200 Subject: [PATCH] feat(source): optional exclusion of unschedulable nodes (#5045) * feat(source/node): Make exclusion of unschedulable Nodes configurable This fixes a behavioral regression introduced in #4761, where nodes that were previously added to DNS are removed when they are considered unschedulable, for example due to automated maintenance tasks. This change will introduce a new flag called `exclude-unschedulable`, which defaults to `true` in order to keep in line with the current behavior. However, it would also be reasonable to restore the initial behavior before * Allow testing for expected log entries in testNodeSourceEndpoints This commit adds the required logic to be able to test for the existence (and absence) of certain log messages in testNodeSourceEndpoints. As an example, this is implemented for the tests around excludeUnschedulable. A side effect of using LogsToBuffer is that tests can't run in parallel due to the log buffer being shared across all parallel test cases. As such, these specific tests are now executed one after another. * Ensure logging is only hooked for tests that require it * Document new exclude-unschedulable flag for nodes source --- docs/flags.md | 1 + docs/sources/nodes.md | 5 +- pkg/apis/externaldns/types.go | 3 + pkg/apis/externaldns/types_test.go | 4 ++ source/node.go | 30 ++++----- source/node_test.go | 99 +++++++++++++++++++++--------- source/store.go | 4 +- 7 files changed, 99 insertions(+), 47 deletions(-) diff --git a/docs/flags.md b/docs/flags.md index 055b9a0a0..9218ba868 100644 --- a/docs/flags.md +++ b/docs/flags.md @@ -49,6 +49,7 @@ | `--[no-]traefik-disable-legacy` | Disable listeners on Resources under the traefik.containo.us API Group | | `--[no-]traefik-disable-new` | Disable listeners on Resources under the traefik.io API Group | | `--nat64-networks=NAT64-NETWORKS` | Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional) | +| `--[no-]exclude-unschedulable` | Exclude nodes that are considered unschedulable (default: true) | | `--[no-]expose-internal-ipv6` | When using the node source, expose internal IPv6 addresses (optional). Default is true. | | `--provider=provider` | The DNS provider where the DNS records will be created (required, options: akamai, alibabacloud, aws, aws-sd, azure, azure-dns, azure-private-dns, civo, cloudflare, coredns, digitalocean, dnsimple, exoscale, gandi, godaddy, google, ibmcloud, inmemory, linode, ns1, oci, ovh, pdns, pihole, plural, rfc2136, scaleway, skydns, tencentcloud, transip, ultradns, webhook) | | `--provider-cache-time=0s` | The time to cache the DNS provider record list requests. | diff --git a/docs/sources/nodes.md b/docs/sources/nodes.md index ca6830973..b3ede9609 100644 --- a/docs/sources/nodes.md +++ b/docs/sources/nodes.md @@ -7,8 +7,9 @@ The node source adds an `A` record per each node `externalIP` (if not found, any It also adds an `AAAA` record per each node IPv6 `internalIP`. Refer to the [IPv6 Behavior](#ipv6-behavior) section for more details. The TTL of the records can be set with the `external-dns.alpha.kubernetes.io/ttl` node annotation. -Nodes marked as **Unschedulable** as per [core/v1/NodeSpec](https://pkg.go.dev/k8s.io/api@v0.31.1/core/v1#NodeSpec) are excluded. -This avoid exposing Unhealthy, NotReady or SchedulingDisabled (cordon) nodes. +Nodes marked as **Unschedulable** as per [core/v1/NodeSpec](https://pkg.go.dev/k8s.io/api@v0.31.1/core/v1#NodeSpec) are excluded by default. +As such, no DNS records are created for Unhealthy, NotReady or SchedulingDisabled (cordon) nodes (and existing ones are removed). +In case you want to override the default, for example if you manage per-host DNS records via ExternalDNS, you can specify `--no-exclude-unschedulable` to always expose nodes no matter their status. ## IPv6 Behavior diff --git a/pkg/apis/externaldns/types.go b/pkg/apis/externaldns/types.go index d4f0601f9..36cd3a9bb 100644 --- a/pkg/apis/externaldns/types.go +++ b/pkg/apis/externaldns/types.go @@ -213,6 +213,7 @@ type Config struct { TraefikDisableLegacy bool TraefikDisableNew bool NAT64Networks []string + ExcludeUnschedulable bool } var defaultConfig = &Config{ @@ -376,6 +377,7 @@ var defaultConfig = &Config{ TraefikDisableLegacy: false, TraefikDisableNew: false, NAT64Networks: []string{}, + ExcludeUnschedulable: true, } // NewConfig returns new Config object @@ -483,6 +485,7 @@ func App(cfg *Config) *kingpin.Application { app.Flag("traefik-disable-legacy", "Disable listeners on Resources under the traefik.containo.us API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableLegacy)).BoolVar(&cfg.TraefikDisableLegacy) app.Flag("traefik-disable-new", "Disable listeners on Resources under the traefik.io API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableNew)).BoolVar(&cfg.TraefikDisableNew) app.Flag("nat64-networks", "Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional)").StringsVar(&cfg.NAT64Networks) + app.Flag("exclude-unschedulable", "Exclude nodes that are considered unschedulable (default: true)").Default(strconv.FormatBool(defaultConfig.ExcludeUnschedulable)).BoolVar(&cfg.ExcludeUnschedulable) app.Flag("expose-internal-ipv6", "When using the node source, expose internal IPv6 addresses (optional). Default is true.").BoolVar(&cfg.ExposeInternalIPV6) // Flags related to providers diff --git a/pkg/apis/externaldns/types_test.go b/pkg/apis/externaldns/types_test.go index 22973250f..b9b257c8a 100644 --- a/pkg/apis/externaldns/types_test.go +++ b/pkg/apis/externaldns/types_test.go @@ -132,6 +132,7 @@ var ( WebhookProviderURL: "http://localhost:8888", WebhookProviderReadTimeout: 5 * time.Second, WebhookProviderWriteTimeout: 10 * time.Second, + ExcludeUnschedulable: true, } overriddenConfig = &Config{ @@ -247,6 +248,7 @@ var ( WebhookProviderURL: "http://localhost:8888", WebhookProviderReadTimeout: 5 * time.Second, WebhookProviderWriteTimeout: 10 * time.Second, + ExcludeUnschedulable: false, } ) @@ -386,6 +388,7 @@ func TestParseFlags(t *testing.T) { "--managed-record-types=AAAA", "--managed-record-types=CNAME", "--managed-record-types=NS", + "--no-exclude-unschedulable", "--rfc2136-batch-change-size=100", "--rfc2136-load-balancing-strategy=round-robin", "--rfc2136-host=rfc2136-host1", @@ -505,6 +508,7 @@ func TestParseFlags(t *testing.T) { "EXTERNAL_DNS_TRANSIP_KEYFILE": "/path/to/transip.key", "EXTERNAL_DNS_DIGITALOCEAN_API_PAGE_SIZE": "100", "EXTERNAL_DNS_MANAGED_RECORD_TYPES": "A\nAAAA\nCNAME\nNS", + "EXTERNAL_DNS_EXCLUDE_UNSCHEDULABLE": "false", "EXTERNAL_DNS_RFC2136_BATCH_CHANGE_SIZE": "100", "EXTERNAL_DNS_RFC2136_LOAD_BALANCING_STRATEGY": "round-robin", "EXTERNAL_DNS_RFC2136_HOST": "rfc2136-host1\nrfc2136-host2", diff --git a/source/node.go b/source/node.go index c61c232b8..a0e426f17 100644 --- a/source/node.go +++ b/source/node.go @@ -36,16 +36,17 @@ import ( const warningMsg = "The default behavior of exposing internal IPv6 addresses will change in the next minor version. Use --no-expose-internal-ipv6 flag to opt-in to the new behavior." type nodeSource struct { - client kubernetes.Interface - annotationFilter string - fqdnTemplate *template.Template - nodeInformer coreinformers.NodeInformer - labelSelector labels.Selector - exposeInternalIPV6 bool + client kubernetes.Interface + annotationFilter string + fqdnTemplate *template.Template + nodeInformer coreinformers.NodeInformer + labelSelector labels.Selector + excludeUnschedulable bool + exposeInternalIPV6 bool } // NewNodeSource creates a new nodeSource with the given config. -func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotationFilter, fqdnTemplate string, labelSelector labels.Selector, exposeInternalIPv6 bool) (Source, error) { +func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotationFilter, fqdnTemplate string, labelSelector labels.Selector, exposeInternalIPv6 bool, excludeUnschedulable bool) (Source, error) { tmpl, err := parseTemplate(fqdnTemplate) if err != nil { return nil, err @@ -73,12 +74,13 @@ func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotat } return &nodeSource{ - client: kubeClient, - annotationFilter: annotationFilter, - fqdnTemplate: tmpl, - nodeInformer: nodeInformer, - labelSelector: labelSelector, - exposeInternalIPV6: exposeInternalIPv6, + client: kubeClient, + annotationFilter: annotationFilter, + fqdnTemplate: tmpl, + nodeInformer: nodeInformer, + labelSelector: labelSelector, + excludeUnschedulable: excludeUnschedulable, + exposeInternalIPV6: exposeInternalIPv6, }, nil } @@ -106,7 +108,7 @@ func (ns *nodeSource) Endpoints(ctx context.Context) ([]*endpoint.Endpoint, erro continue } - if node.Spec.Unschedulable { + if node.Spec.Unschedulable && ns.excludeUnschedulable { log.Debugf("Skipping node %s because it is unschedulable", node.Name) continue } diff --git a/source/node_test.go b/source/node_test.go index c944c6401..ffe64b601 100644 --- a/source/node_test.go +++ b/source/node_test.go @@ -83,6 +83,7 @@ func testNodeSourceNewNodeSource(t *testing.T) { ti.fqdnTemplate, labels.Everything(), true, + true, ) if ti.expectError { @@ -99,18 +100,21 @@ func testNodeSourceEndpoints(t *testing.T) { t.Parallel() for _, tc := range []struct { - title string - annotationFilter string - labelSelector string - fqdnTemplate string - nodeName string - nodeAddresses []v1.NodeAddress - labels map[string]string - annotations map[string]string - exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released. - unschedulable bool // default to false - expected []*endpoint.Endpoint - expectError bool + title string + annotationFilter string + labelSelector string + fqdnTemplate string + nodeName string + nodeAddresses []v1.NodeAddress + labels map[string]string + annotations map[string]string + excludeUnschedulable bool // default to false + exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released. + unschedulable bool // default to false + expected []*endpoint.Endpoint + expectError bool + expectedLogs []string + expectedAbsentLogs []string }{ { title: "node with short hostname returns one endpoint", @@ -363,16 +367,40 @@ func testNodeSourceEndpoints(t *testing.T) { }, }, { - title: "unschedulable node return nothing", - nodeName: "node1", - exposeInternalIPv6: true, - nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}}, - unschedulable: true, - expected: []*endpoint.Endpoint{}, + title: "unschedulable node return nothing with excludeUnschedulable=true", + nodeName: "node1", + exposeInternalIPv6: true, + nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}}, + unschedulable: true, + excludeUnschedulable: true, + expected: []*endpoint.Endpoint{}, + expectedLogs: []string{ + "Skipping node node1 because it is unschedulable", + }, + }, + { + title: "unschedulable node returns node with excludeUnschedulable=false", + nodeName: "node1", + nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}}, + unschedulable: true, + excludeUnschedulable: false, + expected: []*endpoint.Endpoint{ + {RecordType: "A", DNSName: "node1", Targets: endpoint.Targets{"1.2.3.4"}}, + }, + expectedAbsentLogs: []string{ + "Skipping node node1 because it is unschedulable", + }, }, } { tc := tc t.Run(tc.title, func(t *testing.T) { + var buf *bytes.Buffer + if len(tc.expectedLogs) == 0 && len(tc.expectedAbsentLogs) == 0 { + t.Parallel() + } else { + buf = testutils.LogsToBuffer(log.DebugLevel, t) + } + labelSelector := labels.Everything() if tc.labelSelector != "" { var err error @@ -408,6 +436,7 @@ func testNodeSourceEndpoints(t *testing.T) { tc.fqdnTemplate, labelSelector, tc.exposeInternalIPv6, + tc.excludeUnschedulable, ) require.NoError(t, err) @@ -420,24 +449,33 @@ func testNodeSourceEndpoints(t *testing.T) { // Validate returned endpoints against desired endpoints. validateEndpoints(t, endpoints, tc.expected) + + for _, entry := range tc.expectedLogs { + assert.Contains(t, buf.String(), entry) + } + + for _, entry := range tc.expectedAbsentLogs { + assert.NotContains(t, buf.String(), entry) + } }) } } func testNodeEndpointsWithIPv6(t *testing.T) { for _, tc := range []struct { - title string - annotationFilter string - labelSelector string - fqdnTemplate string - nodeName string - nodeAddresses []v1.NodeAddress - labels map[string]string - annotations map[string]string - exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released. - unschedulable bool // default to false - expected []*endpoint.Endpoint - expectError bool + title string + annotationFilter string + labelSelector string + fqdnTemplate string + nodeName string + nodeAddresses []v1.NodeAddress + labels map[string]string + annotations map[string]string + excludeUnschedulable bool // defaults to false + exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released. + unschedulable bool // default to false + expected []*endpoint.Endpoint + expectError bool }{ { title: "node with only internal IPs should return internal IPvs irrespective of exposeInternalIPv6", @@ -516,6 +554,7 @@ func testNodeEndpointsWithIPv6(t *testing.T) { tc.fqdnTemplate, labelSelector, tc.exposeInternalIPv6, + tc.excludeUnschedulable, ) require.NoError(t, err) diff --git a/source/store.go b/source/store.go index 6c882ec96..7970634c9 100644 --- a/source/store.go +++ b/source/store.go @@ -82,6 +82,7 @@ type Config struct { ResolveLoadBalancerHostname bool TraefikDisableLegacy bool TraefikDisableNew bool + ExcludeUnschedulable bool ExposeInternalIPv6 bool } @@ -126,6 +127,7 @@ func NewSourceConfig(cfg *externaldns.Config) *Config { ResolveLoadBalancerHostname: cfg.ResolveServiceLoadBalancerHostname, TraefikDisableLegacy: cfg.TraefikDisableLegacy, TraefikDisableNew: cfg.TraefikDisableNew, + ExcludeUnschedulable: cfg.ExcludeUnschedulable, ExposeInternalIPv6: cfg.ExposeInternalIPV6, } } @@ -264,7 +266,7 @@ func BuildWithConfig(ctx context.Context, source string, p ClientGenerator, cfg if err != nil { return nil, err } - return NewNodeSource(ctx, client, cfg.AnnotationFilter, cfg.FQDNTemplate, cfg.LabelFilter, cfg.ExposeInternalIPv6) + return NewNodeSource(ctx, client, cfg.AnnotationFilter, cfg.FQDNTemplate, cfg.LabelFilter, cfg.ExposeInternalIPv6, cfg.ExcludeUnschedulable) case "service": client, err := p.KubeClient() if err != nil {