feat(source): optional exclusion of unschedulable nodes (#5045)

* feat(source/node): Make exclusion of unschedulable Nodes configurable

This fixes a behavioral regression introduced in #4761, where
nodes that were previously added to DNS are removed when they are considered
unschedulable, for example due to automated maintenance tasks.

This change will introduce a new flag called `exclude-unschedulable`, which
defaults to `true` in order to keep in line with the current behavior.
However, it would also be reasonable to restore the initial behavior before

* Allow testing for expected log entries in testNodeSourceEndpoints

This commit adds the required logic to be able to test for
the existence (and absence) of certain log messages
in testNodeSourceEndpoints. As an example, this is implemented
for the tests around excludeUnschedulable.

A side effect of using LogsToBuffer is that tests can't run in
parallel due to the log buffer being shared across all
parallel test cases. As such, these specific tests are now executed
one after another.

* Ensure logging is only hooked for tests that require it

* Document new exclude-unschedulable flag for nodes source
This commit is contained in:
Markus 2025-04-07 16:34:40 +02:00 committed by GitHub
parent c5af75e380
commit c0a9eed521
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 99 additions and 47 deletions

View File

@ -49,6 +49,7 @@
| `--[no-]traefik-disable-legacy` | Disable listeners on Resources under the traefik.containo.us API Group | | `--[no-]traefik-disable-legacy` | Disable listeners on Resources under the traefik.containo.us API Group |
| `--[no-]traefik-disable-new` | Disable listeners on Resources under the traefik.io API Group | | `--[no-]traefik-disable-new` | Disable listeners on Resources under the traefik.io API Group |
| `--nat64-networks=NAT64-NETWORKS` | Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional) | | `--nat64-networks=NAT64-NETWORKS` | Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional) |
| `--[no-]exclude-unschedulable` | Exclude nodes that are considered unschedulable (default: true) |
| `--[no-]expose-internal-ipv6` | When using the node source, expose internal IPv6 addresses (optional). Default is true. | | `--[no-]expose-internal-ipv6` | When using the node source, expose internal IPv6 addresses (optional). Default is true. |
| `--provider=provider` | The DNS provider where the DNS records will be created (required, options: akamai, alibabacloud, aws, aws-sd, azure, azure-dns, azure-private-dns, civo, cloudflare, coredns, digitalocean, dnsimple, exoscale, gandi, godaddy, google, ibmcloud, inmemory, linode, ns1, oci, ovh, pdns, pihole, plural, rfc2136, scaleway, skydns, tencentcloud, transip, ultradns, webhook) | | `--provider=provider` | The DNS provider where the DNS records will be created (required, options: akamai, alibabacloud, aws, aws-sd, azure, azure-dns, azure-private-dns, civo, cloudflare, coredns, digitalocean, dnsimple, exoscale, gandi, godaddy, google, ibmcloud, inmemory, linode, ns1, oci, ovh, pdns, pihole, plural, rfc2136, scaleway, skydns, tencentcloud, transip, ultradns, webhook) |
| `--provider-cache-time=0s` | The time to cache the DNS provider record list requests. | | `--provider-cache-time=0s` | The time to cache the DNS provider record list requests. |

View File

@ -7,8 +7,9 @@ The node source adds an `A` record per each node `externalIP` (if not found, any
It also adds an `AAAA` record per each node IPv6 `internalIP`. Refer to the [IPv6 Behavior](#ipv6-behavior) section for more details. It also adds an `AAAA` record per each node IPv6 `internalIP`. Refer to the [IPv6 Behavior](#ipv6-behavior) section for more details.
The TTL of the records can be set with the `external-dns.alpha.kubernetes.io/ttl` node annotation. The TTL of the records can be set with the `external-dns.alpha.kubernetes.io/ttl` node annotation.
Nodes marked as **Unschedulable** as per [core/v1/NodeSpec](https://pkg.go.dev/k8s.io/api@v0.31.1/core/v1#NodeSpec) are excluded. Nodes marked as **Unschedulable** as per [core/v1/NodeSpec](https://pkg.go.dev/k8s.io/api@v0.31.1/core/v1#NodeSpec) are excluded by default.
This avoid exposing Unhealthy, NotReady or SchedulingDisabled (cordon) nodes. As such, no DNS records are created for Unhealthy, NotReady or SchedulingDisabled (cordon) nodes (and existing ones are removed).
In case you want to override the default, for example if you manage per-host DNS records via ExternalDNS, you can specify `--no-exclude-unschedulable` to always expose nodes no matter their status.
## IPv6 Behavior ## IPv6 Behavior

View File

@ -213,6 +213,7 @@ type Config struct {
TraefikDisableLegacy bool TraefikDisableLegacy bool
TraefikDisableNew bool TraefikDisableNew bool
NAT64Networks []string NAT64Networks []string
ExcludeUnschedulable bool
} }
var defaultConfig = &Config{ var defaultConfig = &Config{
@ -376,6 +377,7 @@ var defaultConfig = &Config{
TraefikDisableLegacy: false, TraefikDisableLegacy: false,
TraefikDisableNew: false, TraefikDisableNew: false,
NAT64Networks: []string{}, NAT64Networks: []string{},
ExcludeUnschedulable: true,
} }
// NewConfig returns new Config object // NewConfig returns new Config object
@ -483,6 +485,7 @@ func App(cfg *Config) *kingpin.Application {
app.Flag("traefik-disable-legacy", "Disable listeners on Resources under the traefik.containo.us API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableLegacy)).BoolVar(&cfg.TraefikDisableLegacy) app.Flag("traefik-disable-legacy", "Disable listeners on Resources under the traefik.containo.us API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableLegacy)).BoolVar(&cfg.TraefikDisableLegacy)
app.Flag("traefik-disable-new", "Disable listeners on Resources under the traefik.io API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableNew)).BoolVar(&cfg.TraefikDisableNew) app.Flag("traefik-disable-new", "Disable listeners on Resources under the traefik.io API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableNew)).BoolVar(&cfg.TraefikDisableNew)
app.Flag("nat64-networks", "Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional)").StringsVar(&cfg.NAT64Networks) app.Flag("nat64-networks", "Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional)").StringsVar(&cfg.NAT64Networks)
app.Flag("exclude-unschedulable", "Exclude nodes that are considered unschedulable (default: true)").Default(strconv.FormatBool(defaultConfig.ExcludeUnschedulable)).BoolVar(&cfg.ExcludeUnschedulable)
app.Flag("expose-internal-ipv6", "When using the node source, expose internal IPv6 addresses (optional). Default is true.").BoolVar(&cfg.ExposeInternalIPV6) app.Flag("expose-internal-ipv6", "When using the node source, expose internal IPv6 addresses (optional). Default is true.").BoolVar(&cfg.ExposeInternalIPV6)
// Flags related to providers // Flags related to providers

View File

@ -132,6 +132,7 @@ var (
WebhookProviderURL: "http://localhost:8888", WebhookProviderURL: "http://localhost:8888",
WebhookProviderReadTimeout: 5 * time.Second, WebhookProviderReadTimeout: 5 * time.Second,
WebhookProviderWriteTimeout: 10 * time.Second, WebhookProviderWriteTimeout: 10 * time.Second,
ExcludeUnschedulable: true,
} }
overriddenConfig = &Config{ overriddenConfig = &Config{
@ -247,6 +248,7 @@ var (
WebhookProviderURL: "http://localhost:8888", WebhookProviderURL: "http://localhost:8888",
WebhookProviderReadTimeout: 5 * time.Second, WebhookProviderReadTimeout: 5 * time.Second,
WebhookProviderWriteTimeout: 10 * time.Second, WebhookProviderWriteTimeout: 10 * time.Second,
ExcludeUnschedulable: false,
} }
) )
@ -386,6 +388,7 @@ func TestParseFlags(t *testing.T) {
"--managed-record-types=AAAA", "--managed-record-types=AAAA",
"--managed-record-types=CNAME", "--managed-record-types=CNAME",
"--managed-record-types=NS", "--managed-record-types=NS",
"--no-exclude-unschedulable",
"--rfc2136-batch-change-size=100", "--rfc2136-batch-change-size=100",
"--rfc2136-load-balancing-strategy=round-robin", "--rfc2136-load-balancing-strategy=round-robin",
"--rfc2136-host=rfc2136-host1", "--rfc2136-host=rfc2136-host1",
@ -505,6 +508,7 @@ func TestParseFlags(t *testing.T) {
"EXTERNAL_DNS_TRANSIP_KEYFILE": "/path/to/transip.key", "EXTERNAL_DNS_TRANSIP_KEYFILE": "/path/to/transip.key",
"EXTERNAL_DNS_DIGITALOCEAN_API_PAGE_SIZE": "100", "EXTERNAL_DNS_DIGITALOCEAN_API_PAGE_SIZE": "100",
"EXTERNAL_DNS_MANAGED_RECORD_TYPES": "A\nAAAA\nCNAME\nNS", "EXTERNAL_DNS_MANAGED_RECORD_TYPES": "A\nAAAA\nCNAME\nNS",
"EXTERNAL_DNS_EXCLUDE_UNSCHEDULABLE": "false",
"EXTERNAL_DNS_RFC2136_BATCH_CHANGE_SIZE": "100", "EXTERNAL_DNS_RFC2136_BATCH_CHANGE_SIZE": "100",
"EXTERNAL_DNS_RFC2136_LOAD_BALANCING_STRATEGY": "round-robin", "EXTERNAL_DNS_RFC2136_LOAD_BALANCING_STRATEGY": "round-robin",
"EXTERNAL_DNS_RFC2136_HOST": "rfc2136-host1\nrfc2136-host2", "EXTERNAL_DNS_RFC2136_HOST": "rfc2136-host1\nrfc2136-host2",

View File

@ -36,16 +36,17 @@ import (
const warningMsg = "The default behavior of exposing internal IPv6 addresses will change in the next minor version. Use --no-expose-internal-ipv6 flag to opt-in to the new behavior." const warningMsg = "The default behavior of exposing internal IPv6 addresses will change in the next minor version. Use --no-expose-internal-ipv6 flag to opt-in to the new behavior."
type nodeSource struct { type nodeSource struct {
client kubernetes.Interface client kubernetes.Interface
annotationFilter string annotationFilter string
fqdnTemplate *template.Template fqdnTemplate *template.Template
nodeInformer coreinformers.NodeInformer nodeInformer coreinformers.NodeInformer
labelSelector labels.Selector labelSelector labels.Selector
exposeInternalIPV6 bool excludeUnschedulable bool
exposeInternalIPV6 bool
} }
// NewNodeSource creates a new nodeSource with the given config. // NewNodeSource creates a new nodeSource with the given config.
func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotationFilter, fqdnTemplate string, labelSelector labels.Selector, exposeInternalIPv6 bool) (Source, error) { func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotationFilter, fqdnTemplate string, labelSelector labels.Selector, exposeInternalIPv6 bool, excludeUnschedulable bool) (Source, error) {
tmpl, err := parseTemplate(fqdnTemplate) tmpl, err := parseTemplate(fqdnTemplate)
if err != nil { if err != nil {
return nil, err return nil, err
@ -73,12 +74,13 @@ func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotat
} }
return &nodeSource{ return &nodeSource{
client: kubeClient, client: kubeClient,
annotationFilter: annotationFilter, annotationFilter: annotationFilter,
fqdnTemplate: tmpl, fqdnTemplate: tmpl,
nodeInformer: nodeInformer, nodeInformer: nodeInformer,
labelSelector: labelSelector, labelSelector: labelSelector,
exposeInternalIPV6: exposeInternalIPv6, excludeUnschedulable: excludeUnschedulable,
exposeInternalIPV6: exposeInternalIPv6,
}, nil }, nil
} }
@ -106,7 +108,7 @@ func (ns *nodeSource) Endpoints(ctx context.Context) ([]*endpoint.Endpoint, erro
continue continue
} }
if node.Spec.Unschedulable { if node.Spec.Unschedulable && ns.excludeUnschedulable {
log.Debugf("Skipping node %s because it is unschedulable", node.Name) log.Debugf("Skipping node %s because it is unschedulable", node.Name)
continue continue
} }

View File

@ -83,6 +83,7 @@ func testNodeSourceNewNodeSource(t *testing.T) {
ti.fqdnTemplate, ti.fqdnTemplate,
labels.Everything(), labels.Everything(),
true, true,
true,
) )
if ti.expectError { if ti.expectError {
@ -99,18 +100,21 @@ func testNodeSourceEndpoints(t *testing.T) {
t.Parallel() t.Parallel()
for _, tc := range []struct { for _, tc := range []struct {
title string title string
annotationFilter string annotationFilter string
labelSelector string labelSelector string
fqdnTemplate string fqdnTemplate string
nodeName string nodeName string
nodeAddresses []v1.NodeAddress nodeAddresses []v1.NodeAddress
labels map[string]string labels map[string]string
annotations map[string]string annotations map[string]string
exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released. excludeUnschedulable bool // default to false
unschedulable bool // default to false exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released.
expected []*endpoint.Endpoint unschedulable bool // default to false
expectError bool expected []*endpoint.Endpoint
expectError bool
expectedLogs []string
expectedAbsentLogs []string
}{ }{
{ {
title: "node with short hostname returns one endpoint", title: "node with short hostname returns one endpoint",
@ -363,16 +367,40 @@ func testNodeSourceEndpoints(t *testing.T) {
}, },
}, },
{ {
title: "unschedulable node return nothing", title: "unschedulable node return nothing with excludeUnschedulable=true",
nodeName: "node1", nodeName: "node1",
exposeInternalIPv6: true, exposeInternalIPv6: true,
nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}}, nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}},
unschedulable: true, unschedulable: true,
expected: []*endpoint.Endpoint{}, excludeUnschedulable: true,
expected: []*endpoint.Endpoint{},
expectedLogs: []string{
"Skipping node node1 because it is unschedulable",
},
},
{
title: "unschedulable node returns node with excludeUnschedulable=false",
nodeName: "node1",
nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}},
unschedulable: true,
excludeUnschedulable: false,
expected: []*endpoint.Endpoint{
{RecordType: "A", DNSName: "node1", Targets: endpoint.Targets{"1.2.3.4"}},
},
expectedAbsentLogs: []string{
"Skipping node node1 because it is unschedulable",
},
}, },
} { } {
tc := tc tc := tc
t.Run(tc.title, func(t *testing.T) { t.Run(tc.title, func(t *testing.T) {
var buf *bytes.Buffer
if len(tc.expectedLogs) == 0 && len(tc.expectedAbsentLogs) == 0 {
t.Parallel()
} else {
buf = testutils.LogsToBuffer(log.DebugLevel, t)
}
labelSelector := labels.Everything() labelSelector := labels.Everything()
if tc.labelSelector != "" { if tc.labelSelector != "" {
var err error var err error
@ -408,6 +436,7 @@ func testNodeSourceEndpoints(t *testing.T) {
tc.fqdnTemplate, tc.fqdnTemplate,
labelSelector, labelSelector,
tc.exposeInternalIPv6, tc.exposeInternalIPv6,
tc.excludeUnschedulable,
) )
require.NoError(t, err) require.NoError(t, err)
@ -420,24 +449,33 @@ func testNodeSourceEndpoints(t *testing.T) {
// Validate returned endpoints against desired endpoints. // Validate returned endpoints against desired endpoints.
validateEndpoints(t, endpoints, tc.expected) validateEndpoints(t, endpoints, tc.expected)
for _, entry := range tc.expectedLogs {
assert.Contains(t, buf.String(), entry)
}
for _, entry := range tc.expectedAbsentLogs {
assert.NotContains(t, buf.String(), entry)
}
}) })
} }
} }
func testNodeEndpointsWithIPv6(t *testing.T) { func testNodeEndpointsWithIPv6(t *testing.T) {
for _, tc := range []struct { for _, tc := range []struct {
title string title string
annotationFilter string annotationFilter string
labelSelector string labelSelector string
fqdnTemplate string fqdnTemplate string
nodeName string nodeName string
nodeAddresses []v1.NodeAddress nodeAddresses []v1.NodeAddress
labels map[string]string labels map[string]string
annotations map[string]string annotations map[string]string
exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released. excludeUnschedulable bool // defaults to false
unschedulable bool // default to false exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released.
expected []*endpoint.Endpoint unschedulable bool // default to false
expectError bool expected []*endpoint.Endpoint
expectError bool
}{ }{
{ {
title: "node with only internal IPs should return internal IPvs irrespective of exposeInternalIPv6", title: "node with only internal IPs should return internal IPvs irrespective of exposeInternalIPv6",
@ -516,6 +554,7 @@ func testNodeEndpointsWithIPv6(t *testing.T) {
tc.fqdnTemplate, tc.fqdnTemplate,
labelSelector, labelSelector,
tc.exposeInternalIPv6, tc.exposeInternalIPv6,
tc.excludeUnschedulable,
) )
require.NoError(t, err) require.NoError(t, err)

View File

@ -82,6 +82,7 @@ type Config struct {
ResolveLoadBalancerHostname bool ResolveLoadBalancerHostname bool
TraefikDisableLegacy bool TraefikDisableLegacy bool
TraefikDisableNew bool TraefikDisableNew bool
ExcludeUnschedulable bool
ExposeInternalIPv6 bool ExposeInternalIPv6 bool
} }
@ -126,6 +127,7 @@ func NewSourceConfig(cfg *externaldns.Config) *Config {
ResolveLoadBalancerHostname: cfg.ResolveServiceLoadBalancerHostname, ResolveLoadBalancerHostname: cfg.ResolveServiceLoadBalancerHostname,
TraefikDisableLegacy: cfg.TraefikDisableLegacy, TraefikDisableLegacy: cfg.TraefikDisableLegacy,
TraefikDisableNew: cfg.TraefikDisableNew, TraefikDisableNew: cfg.TraefikDisableNew,
ExcludeUnschedulable: cfg.ExcludeUnschedulable,
ExposeInternalIPv6: cfg.ExposeInternalIPV6, ExposeInternalIPv6: cfg.ExposeInternalIPV6,
} }
} }
@ -264,7 +266,7 @@ func BuildWithConfig(ctx context.Context, source string, p ClientGenerator, cfg
if err != nil { if err != nil {
return nil, err return nil, err
} }
return NewNodeSource(ctx, client, cfg.AnnotationFilter, cfg.FQDNTemplate, cfg.LabelFilter, cfg.ExposeInternalIPv6) return NewNodeSource(ctx, client, cfg.AnnotationFilter, cfg.FQDNTemplate, cfg.LabelFilter, cfg.ExposeInternalIPv6, cfg.ExcludeUnschedulable)
case "service": case "service":
client, err := p.KubeClient() client, err := p.KubeClient()
if err != nil { if err != nil {