feat(source): optional exclusion of unschedulable nodes (#5045)

* feat(source/node): Make exclusion of unschedulable Nodes configurable

This fixes a behavioral regression introduced in #4761, where
nodes that were previously added to DNS are removed when they are considered
unschedulable, for example due to automated maintenance tasks.

This change will introduce a new flag called `exclude-unschedulable`, which
defaults to `true` in order to keep in line with the current behavior.
However, it would also be reasonable to restore the initial behavior before

* Allow testing for expected log entries in testNodeSourceEndpoints

This commit adds the required logic to be able to test for
the existence (and absence) of certain log messages
in testNodeSourceEndpoints. As an example, this is implemented
for the tests around excludeUnschedulable.

A side effect of using LogsToBuffer is that tests can't run in
parallel due to the log buffer being shared across all
parallel test cases. As such, these specific tests are now executed
one after another.

* Ensure logging is only hooked for tests that require it

* Document new exclude-unschedulable flag for nodes source
This commit is contained in:
Markus 2025-04-07 16:34:40 +02:00 committed by GitHub
parent c5af75e380
commit c0a9eed521
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 99 additions and 47 deletions

View File

@ -49,6 +49,7 @@
| `--[no-]traefik-disable-legacy` | Disable listeners on Resources under the traefik.containo.us API Group |
| `--[no-]traefik-disable-new` | Disable listeners on Resources under the traefik.io API Group |
| `--nat64-networks=NAT64-NETWORKS` | Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional) |
| `--[no-]exclude-unschedulable` | Exclude nodes that are considered unschedulable (default: true) |
| `--[no-]expose-internal-ipv6` | When using the node source, expose internal IPv6 addresses (optional). Default is true. |
| `--provider=provider` | The DNS provider where the DNS records will be created (required, options: akamai, alibabacloud, aws, aws-sd, azure, azure-dns, azure-private-dns, civo, cloudflare, coredns, digitalocean, dnsimple, exoscale, gandi, godaddy, google, ibmcloud, inmemory, linode, ns1, oci, ovh, pdns, pihole, plural, rfc2136, scaleway, skydns, tencentcloud, transip, ultradns, webhook) |
| `--provider-cache-time=0s` | The time to cache the DNS provider record list requests. |

View File

@ -7,8 +7,9 @@ The node source adds an `A` record per each node `externalIP` (if not found, any
It also adds an `AAAA` record per each node IPv6 `internalIP`. Refer to the [IPv6 Behavior](#ipv6-behavior) section for more details.
The TTL of the records can be set with the `external-dns.alpha.kubernetes.io/ttl` node annotation.
Nodes marked as **Unschedulable** as per [core/v1/NodeSpec](https://pkg.go.dev/k8s.io/api@v0.31.1/core/v1#NodeSpec) are excluded.
This avoid exposing Unhealthy, NotReady or SchedulingDisabled (cordon) nodes.
Nodes marked as **Unschedulable** as per [core/v1/NodeSpec](https://pkg.go.dev/k8s.io/api@v0.31.1/core/v1#NodeSpec) are excluded by default.
As such, no DNS records are created for Unhealthy, NotReady or SchedulingDisabled (cordon) nodes (and existing ones are removed).
In case you want to override the default, for example if you manage per-host DNS records via ExternalDNS, you can specify `--no-exclude-unschedulable` to always expose nodes no matter their status.
## IPv6 Behavior

View File

@ -213,6 +213,7 @@ type Config struct {
TraefikDisableLegacy bool
TraefikDisableNew bool
NAT64Networks []string
ExcludeUnschedulable bool
}
var defaultConfig = &Config{
@ -376,6 +377,7 @@ var defaultConfig = &Config{
TraefikDisableLegacy: false,
TraefikDisableNew: false,
NAT64Networks: []string{},
ExcludeUnschedulable: true,
}
// NewConfig returns new Config object
@ -483,6 +485,7 @@ func App(cfg *Config) *kingpin.Application {
app.Flag("traefik-disable-legacy", "Disable listeners on Resources under the traefik.containo.us API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableLegacy)).BoolVar(&cfg.TraefikDisableLegacy)
app.Flag("traefik-disable-new", "Disable listeners on Resources under the traefik.io API Group").Default(strconv.FormatBool(defaultConfig.TraefikDisableNew)).BoolVar(&cfg.TraefikDisableNew)
app.Flag("nat64-networks", "Adding an A record for each AAAA record in NAT64-enabled networks; specify multiple times for multiple possible nets (optional)").StringsVar(&cfg.NAT64Networks)
app.Flag("exclude-unschedulable", "Exclude nodes that are considered unschedulable (default: true)").Default(strconv.FormatBool(defaultConfig.ExcludeUnschedulable)).BoolVar(&cfg.ExcludeUnschedulable)
app.Flag("expose-internal-ipv6", "When using the node source, expose internal IPv6 addresses (optional). Default is true.").BoolVar(&cfg.ExposeInternalIPV6)
// Flags related to providers

View File

@ -132,6 +132,7 @@ var (
WebhookProviderURL: "http://localhost:8888",
WebhookProviderReadTimeout: 5 * time.Second,
WebhookProviderWriteTimeout: 10 * time.Second,
ExcludeUnschedulable: true,
}
overriddenConfig = &Config{
@ -247,6 +248,7 @@ var (
WebhookProviderURL: "http://localhost:8888",
WebhookProviderReadTimeout: 5 * time.Second,
WebhookProviderWriteTimeout: 10 * time.Second,
ExcludeUnschedulable: false,
}
)
@ -386,6 +388,7 @@ func TestParseFlags(t *testing.T) {
"--managed-record-types=AAAA",
"--managed-record-types=CNAME",
"--managed-record-types=NS",
"--no-exclude-unschedulable",
"--rfc2136-batch-change-size=100",
"--rfc2136-load-balancing-strategy=round-robin",
"--rfc2136-host=rfc2136-host1",
@ -505,6 +508,7 @@ func TestParseFlags(t *testing.T) {
"EXTERNAL_DNS_TRANSIP_KEYFILE": "/path/to/transip.key",
"EXTERNAL_DNS_DIGITALOCEAN_API_PAGE_SIZE": "100",
"EXTERNAL_DNS_MANAGED_RECORD_TYPES": "A\nAAAA\nCNAME\nNS",
"EXTERNAL_DNS_EXCLUDE_UNSCHEDULABLE": "false",
"EXTERNAL_DNS_RFC2136_BATCH_CHANGE_SIZE": "100",
"EXTERNAL_DNS_RFC2136_LOAD_BALANCING_STRATEGY": "round-robin",
"EXTERNAL_DNS_RFC2136_HOST": "rfc2136-host1\nrfc2136-host2",

View File

@ -36,16 +36,17 @@ import (
const warningMsg = "The default behavior of exposing internal IPv6 addresses will change in the next minor version. Use --no-expose-internal-ipv6 flag to opt-in to the new behavior."
type nodeSource struct {
client kubernetes.Interface
annotationFilter string
fqdnTemplate *template.Template
nodeInformer coreinformers.NodeInformer
labelSelector labels.Selector
exposeInternalIPV6 bool
client kubernetes.Interface
annotationFilter string
fqdnTemplate *template.Template
nodeInformer coreinformers.NodeInformer
labelSelector labels.Selector
excludeUnschedulable bool
exposeInternalIPV6 bool
}
// NewNodeSource creates a new nodeSource with the given config.
func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotationFilter, fqdnTemplate string, labelSelector labels.Selector, exposeInternalIPv6 bool) (Source, error) {
func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotationFilter, fqdnTemplate string, labelSelector labels.Selector, exposeInternalIPv6 bool, excludeUnschedulable bool) (Source, error) {
tmpl, err := parseTemplate(fqdnTemplate)
if err != nil {
return nil, err
@ -73,12 +74,13 @@ func NewNodeSource(ctx context.Context, kubeClient kubernetes.Interface, annotat
}
return &nodeSource{
client: kubeClient,
annotationFilter: annotationFilter,
fqdnTemplate: tmpl,
nodeInformer: nodeInformer,
labelSelector: labelSelector,
exposeInternalIPV6: exposeInternalIPv6,
client: kubeClient,
annotationFilter: annotationFilter,
fqdnTemplate: tmpl,
nodeInformer: nodeInformer,
labelSelector: labelSelector,
excludeUnschedulable: excludeUnschedulable,
exposeInternalIPV6: exposeInternalIPv6,
}, nil
}
@ -106,7 +108,7 @@ func (ns *nodeSource) Endpoints(ctx context.Context) ([]*endpoint.Endpoint, erro
continue
}
if node.Spec.Unschedulable {
if node.Spec.Unschedulable && ns.excludeUnschedulable {
log.Debugf("Skipping node %s because it is unschedulable", node.Name)
continue
}

View File

@ -83,6 +83,7 @@ func testNodeSourceNewNodeSource(t *testing.T) {
ti.fqdnTemplate,
labels.Everything(),
true,
true,
)
if ti.expectError {
@ -99,18 +100,21 @@ func testNodeSourceEndpoints(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
title string
annotationFilter string
labelSelector string
fqdnTemplate string
nodeName string
nodeAddresses []v1.NodeAddress
labels map[string]string
annotations map[string]string
exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released.
unschedulable bool // default to false
expected []*endpoint.Endpoint
expectError bool
title string
annotationFilter string
labelSelector string
fqdnTemplate string
nodeName string
nodeAddresses []v1.NodeAddress
labels map[string]string
annotations map[string]string
excludeUnschedulable bool // default to false
exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released.
unschedulable bool // default to false
expected []*endpoint.Endpoint
expectError bool
expectedLogs []string
expectedAbsentLogs []string
}{
{
title: "node with short hostname returns one endpoint",
@ -363,16 +367,40 @@ func testNodeSourceEndpoints(t *testing.T) {
},
},
{
title: "unschedulable node return nothing",
nodeName: "node1",
exposeInternalIPv6: true,
nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}},
unschedulable: true,
expected: []*endpoint.Endpoint{},
title: "unschedulable node return nothing with excludeUnschedulable=true",
nodeName: "node1",
exposeInternalIPv6: true,
nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}},
unschedulable: true,
excludeUnschedulable: true,
expected: []*endpoint.Endpoint{},
expectedLogs: []string{
"Skipping node node1 because it is unschedulable",
},
},
{
title: "unschedulable node returns node with excludeUnschedulable=false",
nodeName: "node1",
nodeAddresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: "1.2.3.4"}},
unschedulable: true,
excludeUnschedulable: false,
expected: []*endpoint.Endpoint{
{RecordType: "A", DNSName: "node1", Targets: endpoint.Targets{"1.2.3.4"}},
},
expectedAbsentLogs: []string{
"Skipping node node1 because it is unschedulable",
},
},
} {
tc := tc
t.Run(tc.title, func(t *testing.T) {
var buf *bytes.Buffer
if len(tc.expectedLogs) == 0 && len(tc.expectedAbsentLogs) == 0 {
t.Parallel()
} else {
buf = testutils.LogsToBuffer(log.DebugLevel, t)
}
labelSelector := labels.Everything()
if tc.labelSelector != "" {
var err error
@ -408,6 +436,7 @@ func testNodeSourceEndpoints(t *testing.T) {
tc.fqdnTemplate,
labelSelector,
tc.exposeInternalIPv6,
tc.excludeUnschedulable,
)
require.NoError(t, err)
@ -420,24 +449,33 @@ func testNodeSourceEndpoints(t *testing.T) {
// Validate returned endpoints against desired endpoints.
validateEndpoints(t, endpoints, tc.expected)
for _, entry := range tc.expectedLogs {
assert.Contains(t, buf.String(), entry)
}
for _, entry := range tc.expectedAbsentLogs {
assert.NotContains(t, buf.String(), entry)
}
})
}
}
func testNodeEndpointsWithIPv6(t *testing.T) {
for _, tc := range []struct {
title string
annotationFilter string
labelSelector string
fqdnTemplate string
nodeName string
nodeAddresses []v1.NodeAddress
labels map[string]string
annotations map[string]string
exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released.
unschedulable bool // default to false
expected []*endpoint.Endpoint
expectError bool
title string
annotationFilter string
labelSelector string
fqdnTemplate string
nodeName string
nodeAddresses []v1.NodeAddress
labels map[string]string
annotations map[string]string
excludeUnschedulable bool // defaults to false
exposeInternalIPv6 bool // default to true for this version. Change later when the next minor version is released.
unschedulable bool // default to false
expected []*endpoint.Endpoint
expectError bool
}{
{
title: "node with only internal IPs should return internal IPvs irrespective of exposeInternalIPv6",
@ -516,6 +554,7 @@ func testNodeEndpointsWithIPv6(t *testing.T) {
tc.fqdnTemplate,
labelSelector,
tc.exposeInternalIPv6,
tc.excludeUnschedulable,
)
require.NoError(t, err)

View File

@ -82,6 +82,7 @@ type Config struct {
ResolveLoadBalancerHostname bool
TraefikDisableLegacy bool
TraefikDisableNew bool
ExcludeUnschedulable bool
ExposeInternalIPv6 bool
}
@ -126,6 +127,7 @@ func NewSourceConfig(cfg *externaldns.Config) *Config {
ResolveLoadBalancerHostname: cfg.ResolveServiceLoadBalancerHostname,
TraefikDisableLegacy: cfg.TraefikDisableLegacy,
TraefikDisableNew: cfg.TraefikDisableNew,
ExcludeUnschedulable: cfg.ExcludeUnschedulable,
ExposeInternalIPv6: cfg.ExposeInternalIPV6,
}
}
@ -264,7 +266,7 @@ func BuildWithConfig(ctx context.Context, source string, p ClientGenerator, cfg
if err != nil {
return nil, err
}
return NewNodeSource(ctx, client, cfg.AnnotationFilter, cfg.FQDNTemplate, cfg.LabelFilter, cfg.ExposeInternalIPv6)
return NewNodeSource(ctx, client, cfg.AnnotationFilter, cfg.FQDNTemplate, cfg.LabelFilter, cfg.ExposeInternalIPv6, cfg.ExcludeUnschedulable)
case "service":
client, err := p.KubeClient()
if err != nil {