From 781eb5c8efa5f24c5f90bfee300840dd071cf235 Mon Sep 17 00:00:00 2001 From: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Date: Fri, 2 Aug 2024 08:28:23 -0500 Subject: [PATCH] Added init work to support RFC2136 multiple hosts. Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Fixed go tests, added checks to ensure multiple hosts, and RFC2136LoadBalancingStrategy is set and can be overritten Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Documentation to support Multiple Hosts and Load Balancing features Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> WIP, counter not working correctly Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Make pointers to the rfc2136 provider, fixed counter issue, log out last error. Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Fix error with failover not working correctly Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Repoint makefile us.gcr.io/k8s-artifacts-prod/external-dns Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Repoint makefile us.gcr.io/k8s-artifacts-prod/external-dns Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Fix changes that arent related directly to this PR Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> Changed comment message details for counter Signed-off-by: Jeremy-Boyle <9406398+Jeremy-Boyle@users.noreply.github.com> --- docs/tutorials/rfc2136.md | 46 ++++- main.go | 3 +- pkg/apis/externaldns/types.go | 6 +- pkg/apis/externaldns/types_test.go | 17 +- provider/rfc2136/rfc2136.go | 281 ++++++++++++++++++++--------- provider/rfc2136/rfc2136_test.go | 181 +++++++++++++++++-- 6 files changed, 430 insertions(+), 104 deletions(-) diff --git a/docs/tutorials/rfc2136.md b/docs/tutorials/rfc2136.md index 09e7a72e7..0b0e15f84 100644 --- a/docs/tutorials/rfc2136.md +++ b/docs/tutorials/rfc2136.md @@ -379,7 +379,7 @@ You'll want to configure `external-dns` similarly to the following: ```text ... - --provider=rfc2136 - - --rfc2136-gss-tsig + - --rfc2136-gss-tsig - --rfc2136-host=dns-host.yourdomain.com - --rfc2136-port=53 - --rfc2136-zone=your-zone.com @@ -435,3 +435,47 @@ If your DNS server does zone transfers over TLS, you can instruct `external-dns` * `--rfc2136-skip-tls-verify` Disables verification of the certificate supplied by the DNS server. It is currently not supported to do only zone transfers over TLS, but not the updates. They are enabled and disabled together. + +## Configuring RFC2136 Provider with Multiple Hosts and Load Balancing + +This section describes how to configure the RFC2136 provider in ExternalDNS to support multiple DNS servers and load balancing options. + +### Enhancements Overview + +The RFC2136 provider now supports multiple DNS hosts and introduces load balancing options to distribute DNS update requests evenly across available DNS servers. This helps prevent a single server from becoming a bottleneck in environments with multiple DNS servers. + +### Configuration Steps + +1. **Allow Multiple Hosts for `--rfc2136-host`** + - Modify the `--rfc2136-host` command-line option to accept multiple hosts. + - Example: `--rfc2136-host="dns-host-1.yourdomain.com" --rfc2136-host="dns-host-2.yourdomain.com"` + +2. **Introduce Load Balancing Options** + - Add a new command-line option `--rfc2136-load-balancing-strategy` to specify the load balancing strategy. + - Supported options: + - `round-robin`: Distributes DNS updates evenly across all specified hosts in a round-robin manner. + - `random`: Randomly selects a host for each DNS update. + - `disabled` (default): Uses the first host in the list as the primary, only moving to the next host if a failure occurs. + +### Example Configuration + +```shell +external-dns \ + --provider=rfc2136 \ + --rfc2136-host="dns-host-1.yourdomain.com" \ + --rfc2136-host="dns-host-2.yourdomain.com" \ + --rfc2136-host="dns-host-3.yourdomain.com" \ + --rfc2136-load-balancing-strategy="round-robin" \ + --rfc2136-port=53 \ + --rfc2136-zone=example.com \ + --rfc2136-tsig-secret-alg=hmac-sha256 \ + --rfc2136-tsig-keyname=example-key \ + --rfc2136-tsig-secret=example-secret \ + --rfc2136-insecure +``` + +### Benefits + +- Distributes the load of DNS updates across multiple data centers, preventing any single DC from becoming a bottleneck. +- Provides flexibility to choose different load balancing strategies based on the environment and requirements. +- Improves the resilience and reliability of DNS updates by introducing a retry mechanism with a list of hosts. \ No newline at end of file diff --git a/main.go b/main.go index d58006391..848f87522 100644 --- a/main.go +++ b/main.go @@ -317,9 +317,8 @@ func main() { CAFilePath: cfg.TLSCA, ClientCertFilePath: cfg.TLSClientCert, ClientCertKeyFilePath: cfg.TLSClientCertKey, - ServerName: "", } - p, err = rfc2136.NewRfc2136Provider(cfg.RFC2136Host, cfg.RFC2136Port, cfg.RFC2136Zone, cfg.RFC2136Insecure, cfg.RFC2136TSIGKeyName, cfg.RFC2136TSIGSecret, cfg.RFC2136TSIGSecretAlg, cfg.RFC2136TAXFR, domainFilter, cfg.DryRun, cfg.RFC2136MinTTL, cfg.RFC2136CreatePTR, cfg.RFC2136GSSTSIG, cfg.RFC2136KerberosUsername, cfg.RFC2136KerberosPassword, cfg.RFC2136KerberosRealm, cfg.RFC2136BatchChangeSize, tlsConfig, nil) + p, err = rfc2136.NewRfc2136Provider(cfg.RFC2136Host, cfg.RFC2136Port, cfg.RFC2136Zone, cfg.RFC2136Insecure, cfg.RFC2136TSIGKeyName, cfg.RFC2136TSIGSecret, cfg.RFC2136TSIGSecretAlg, cfg.RFC2136TAXFR, domainFilter, cfg.DryRun, cfg.RFC2136MinTTL, cfg.RFC2136CreatePTR, cfg.RFC2136GSSTSIG, cfg.RFC2136KerberosUsername, cfg.RFC2136KerberosPassword, cfg.RFC2136KerberosRealm, cfg.RFC2136BatchChangeSize, tlsConfig, cfg.RFC2136LoadBalancingStrategy, nil) case "ns1": p, err = ns1.NewNS1Provider( ns1.NS1Config{ diff --git a/pkg/apis/externaldns/types.go b/pkg/apis/externaldns/types.go index aaa1d1745..423c528e7 100644 --- a/pkg/apis/externaldns/types.go +++ b/pkg/apis/externaldns/types.go @@ -163,7 +163,7 @@ type Config struct { CFUsername string CFPassword string ResolveServiceLoadBalancerHostname bool - RFC2136Host string + RFC2136Host []string RFC2136Port int RFC2136Zone []string RFC2136Insecure bool @@ -177,6 +177,7 @@ type Config struct { RFC2136TSIGSecretAlg string RFC2136TAXFR bool RFC2136MinTTL time.Duration + RFC2136LoadBalancingStrategy string RFC2136BatchChangeSize int RFC2136UseTLS bool RFC2136SkipTLSVerify bool @@ -556,7 +557,7 @@ func App(cfg *Config) *kingpin.Application { app.Flag("exoscale-apisecret", "Provide your API Secret for the Exoscale provider").Default(defaultConfig.ExoscaleAPISecret).StringVar(&cfg.ExoscaleAPISecret) // Flags related to RFC2136 provider - app.Flag("rfc2136-host", "When using the RFC2136 provider, specify the host of the DNS server").Default(defaultConfig.RFC2136Host).StringVar(&cfg.RFC2136Host) + app.Flag("rfc2136-host", "When using the RFC2136 provider, specify the host of the DNS server").Default(defaultConfig.RFC2136Host[0]).StringsVar(&cfg.RFC2136Host) app.Flag("rfc2136-port", "When using the RFC2136 provider, specify the port of the DNS server").Default(strconv.Itoa(defaultConfig.RFC2136Port)).IntVar(&cfg.RFC2136Port) app.Flag("rfc2136-zone", "When using the RFC2136 provider, specify zone entries of the DNS server to use").StringsVar(&cfg.RFC2136Zone) app.Flag("rfc2136-create-ptr", "When using the RFC2136 provider, enable PTR management").Default(strconv.FormatBool(defaultConfig.RFC2136CreatePTR)).BoolVar(&cfg.RFC2136CreatePTR) @@ -573,6 +574,7 @@ func App(cfg *Config) *kingpin.Application { app.Flag("rfc2136-batch-change-size", "When using the RFC2136 provider, set the maximum number of changes that will be applied in each batch.").Default(strconv.Itoa(defaultConfig.RFC2136BatchChangeSize)).IntVar(&cfg.RFC2136BatchChangeSize) app.Flag("rfc2136-use-tls", "When using the RFC2136 provider, communicate with name server over tls").BoolVar(&cfg.RFC2136UseTLS) app.Flag("rfc2136-skip-tls-verify", "When using TLS with the RFC2136 provider, disable verification of any TLS certificates").BoolVar(&cfg.RFC2136SkipTLSVerify) + app.Flag("rfc2136-load-balancing-strategy", "When using the RFC2136 provider, specify the load balancing strategy (default: disabled, options: random, round-robin, disabled)").Default(defaultConfig.RFC2136LoadBalancingStrategy).EnumVar(&cfg.RFC2136LoadBalancingStrategy, "random", "round-robin", "disabled") // Flags related to TransIP provider app.Flag("transip-account", "When using the TransIP provider, specify the account name (required when --provider=transip)").Default(defaultConfig.TransIPAccountName).StringVar(&cfg.TransIPAccountName) diff --git a/pkg/apis/externaldns/types_test.go b/pkg/apis/externaldns/types_test.go index 066c05da3..ca2db5071 100644 --- a/pkg/apis/externaldns/types_test.go +++ b/pkg/apis/externaldns/types_test.go @@ -171,11 +171,18 @@ var ( AWSProfiles: []string{"profile1", "profile2"}, AWSZoneCacheDuration: 10 * time.Second, AWSSDServiceCleanup: true, - AWSSDCreateTag: map[string]string{"key1": "value1", "key2": "value2"}, AWSDynamoDBTable: "custom-table", AzureConfigFile: "azure.json", AzureResourceGroup: "arg", AzureSubscriptionID: "arg", + BluecatDNSConfiguration: "arg", + BluecatDNSServerName: "arg", + BluecatConfigFile: "bluecat.json", + BluecatDNSView: "arg", + BluecatGatewayHost: "arg", + BluecatRootZone: "arg", + BluecatDNSDeployType: "full-deploy", + BluecatSkipTLSVerify: true, CloudflareProxied: true, CloudflareDNSRecordsPerPage: 5000, CloudflareRegionKey: "us", @@ -193,7 +200,6 @@ var ( OVHEndpoint: "ovh-ca", OVHApiRateLimit: 42, PDNSServer: "http://ns.example.com:8081", - PDNSServerID: "localhost", PDNSAPIKey: "some-secret-key", PDNSSkipTLSVerify: true, TLSCA: "/path/to/ca.crt", @@ -221,6 +227,7 @@ var ( ExoscaleAPISecret: "2", CRDSourceAPIVersion: "test.k8s.io/v1alpha1", CRDSourceKind: "Endpoint", + RcodezeroTXTEncrypt: true, NS1Endpoint: "https://api.example.com/v1", NS1IgnoreSSL: true, TransIPAccountName: "transip", @@ -371,6 +378,9 @@ func TestParseFlags(t *testing.T) { "--managed-record-types=CNAME", "--managed-record-types=NS", "--rfc2136-batch-change-size=100", + "--rfc2136-load-balancing-strategy=round-robin", + "--rfc2136-host=rfc2136-host1", + "--rfc2136-host=rfc2136-host2", "--ibmcloud-proxied", "--ibmcloud-config-file=ibmcloud.json", "--tencent-cloud-config-file=tencent-cloud.json", @@ -483,6 +493,8 @@ func TestParseFlags(t *testing.T) { "EXTERNAL_DNS_DIGITALOCEAN_API_PAGE_SIZE": "100", "EXTERNAL_DNS_MANAGED_RECORD_TYPES": "A\nAAAA\nCNAME\nNS", "EXTERNAL_DNS_RFC2136_BATCH_CHANGE_SIZE": "100", + "EXTERNAL_DNS_RFC2136_LOAD_BALANCING_STRATEGY": "round-robin", + "EXTERNAL_DNS_RFC2136_HOST": "rfc2136-host1\nrfc2136-host2", "EXTERNAL_DNS_IBMCLOUD_PROXIED": "1", "EXTERNAL_DNS_IBMCLOUD_CONFIG_FILE": "ibmcloud.json", "EXTERNAL_DNS_TENCENT_CLOUD_CONFIG_FILE": "tencent-cloud.json", @@ -523,6 +535,7 @@ func restoreEnv(t *testing.T, originalEnv map[string]string) { func TestPasswordsNotLogged(t *testing.T) { cfg := Config{ + DynPassword: "dyn-pass", PDNSAPIKey: "pdns-api-key", RFC2136TSIGSecret: "tsig-secret", } diff --git a/provider/rfc2136/rfc2136.go b/provider/rfc2136/rfc2136.go index ba5828212..40ec52b34 100644 --- a/provider/rfc2136/rfc2136.go +++ b/provider/rfc2136/rfc2136.go @@ -20,10 +20,12 @@ import ( "context" "crypto/tls" "fmt" + "math/rand" "net" "sort" "strconv" "strings" + "sync" "time" "github.com/bodgit/tsig" @@ -47,7 +49,7 @@ const ( // rfc2136 provider type type rfc2136Provider struct { provider.BaseProvider - nameserver string + nameservers []string zoneNames []string tsigKeyName string tsigSecret string @@ -69,6 +71,22 @@ type rfc2136Provider struct { domainFilter endpoint.DomainFilter dryRun bool actions rfc2136Actions + + // Counter for load balancing, and error handling + counter int + mu sync.Mutex // Mutex for thread-safe counter + + // Load balancing strategy "round-robin", "random", or "disabled" + loadBalancingStrategy string + + // Random number generator for random load balancing + randGen *rand.Rand + + // Store TSIG credentials for each nameserver + credentials map[string]*gss.Client + + // Last error encountered + lastErr error } // TLSConfig is comprised of the TLS-related fields necessary if we are using DNS over TLS @@ -78,7 +96,6 @@ type TLSConfig struct { CAFilePath string ClientCertFilePath string ClientCertKeyFilePath string - ServerName string } // Map of supported TSIG algorithms @@ -92,11 +109,11 @@ var tsigAlgs = map[string]string{ type rfc2136Actions interface { SendMessage(msg *dns.Msg) error - IncomeTransfer(m *dns.Msg, a string) (env chan *dns.Envelope, err error) + IncomeTransfer(m *dns.Msg, nameserver string) (env chan *dns.Envelope, err error) } // NewRfc2136Provider is a factory function for OpenStack rfc2136 providers -func NewRfc2136Provider(host string, port int, zoneNames []string, insecure bool, keyName string, secret string, secretAlg string, axfr bool, domainFilter endpoint.DomainFilter, dryRun bool, minTTL time.Duration, createPTR bool, gssTsig bool, krb5Username string, krb5Password string, krb5Realm string, batchChangeSize int, tlsConfig TLSConfig, actions rfc2136Actions) (provider.Provider, error) { +func NewRfc2136Provider(hosts []string, port int, zoneNames []string, insecure bool, keyName string, secret string, secretAlg string, axfr bool, domainFilter endpoint.DomainFilter, dryRun bool, minTTL time.Duration, createPTR bool, gssTsig bool, krb5Username string, krb5Password string, krb5Realm string, batchChangeSize int, tlsConfig TLSConfig, loadBalancingStrategy string, actions rfc2136Actions) (provider.Provider, error) { secretAlgChecked, ok := tsigAlgs[secretAlg] if !ok && !insecure && !gssTsig { return nil, errors.Errorf("%s is not supported TSIG algorithm", secretAlg) @@ -112,25 +129,32 @@ func NewRfc2136Provider(host string, port int, zoneNames []string, insecure bool return len(strings.Split(zoneNames[i], ".")) > len(strings.Split(zoneNames[j], ".")) }) - if tlsConfig.UseTLS { - tlsConfig.ServerName = host + var nameservers []string + for _, host := range hosts { + host = net.JoinHostPort(host, strconv.Itoa(port)) + nameservers = append(nameservers, host) } r := &rfc2136Provider{ - nameserver: net.JoinHostPort(host, strconv.Itoa(port)), - zoneNames: zoneNames, - insecure: insecure, - gssTsig: gssTsig, - createPTR: createPTR, - krb5Username: krb5Username, - krb5Password: krb5Password, - krb5Realm: strings.ToUpper(krb5Realm), - domainFilter: domainFilter, - dryRun: dryRun, - axfr: axfr, - minTTL: minTTL, - batchChangeSize: batchChangeSize, - tlsConfig: tlsConfig, + nameservers: nameservers, + zoneNames: zoneNames, + insecure: insecure, + gssTsig: gssTsig, + createPTR: createPTR, + krb5Username: krb5Username, + krb5Password: krb5Password, + krb5Realm: strings.ToUpper(krb5Realm), + domainFilter: domainFilter, + dryRun: dryRun, + axfr: axfr, + minTTL: minTTL, + batchChangeSize: batchChangeSize, + tlsConfig: tlsConfig, + loadBalancingStrategy: loadBalancingStrategy, + randGen: rand.New(rand.NewSource(time.Now().UnixNano())), + credentials: make(map[string]*gss.Client), + counter: 0, + lastErr: nil, } if actions != nil { r.actions = actions @@ -144,24 +168,35 @@ func NewRfc2136Provider(host string, port int, zoneNames []string, insecure bool r.tsigSecretAlg = secretAlgChecked } - log.Infof("Configured RFC2136 with zone '%s' and nameserver '%s'", r.zoneNames, r.nameserver) + log.Infof("Configured RFC2136 with zones '%v' and nameservers '%v'", r.zoneNames, hosts) return r, nil } -// KeyName will return TKEY name and TSIG handle to use for followon actions with a secure connection -func (r rfc2136Provider) KeyData() (keyName string, handle *gss.Client, err error) { +// KeyData will return TKEY name and TSIG handle to use for followon actions with a secure connection +func (r *rfc2136Provider) KeyData(nameserver string) (keyName string, handle *gss.Client, err error) { + // Check if we already have credentials for this nameserver + if existingHandle, ok := r.credentials[nameserver]; ok { + return nameserver, existingHandle, nil + } + handle, err = gss.NewClient(new(dns.Client)) if err != nil { return keyName, handle, err } - keyName, _, err = handle.NegotiateContextWithCredentials(r.nameserver, r.krb5Realm, r.krb5Username, r.krb5Password) + keyName, _, err = handle.NegotiateContextWithCredentials(nameserver, r.krb5Realm, r.krb5Username, r.krb5Password) + if err != nil { + return keyName, handle, err + } - return keyName, handle, err + // Store the credentials for this nameserver + r.credentials[nameserver] = handle + + return keyName, handle, nil } // Records returns the list of records. -func (r rfc2136Provider) Records(ctx context.Context) ([]*endpoint.Endpoint, error) { +func (r *rfc2136Provider) Records(ctx context.Context) ([]*endpoint.Endpoint, error) { rrs, err := r.List() if err != nil { return nil, err @@ -224,25 +259,25 @@ OuterLoop: return eps, nil } -func (r rfc2136Provider) IncomeTransfer(m *dns.Msg, a string) (env chan *dns.Envelope, err error) { +func (r *rfc2136Provider) IncomeTransfer(m *dns.Msg, nameserver string) (env chan *dns.Envelope, err error) { t := new(dns.Transfer) if !r.insecure && !r.gssTsig { t.TsigSecret = map[string]string{r.tsigKeyName: r.tsigSecret} } - c, err := makeClient(r) + c, err := makeClient(r, nameserver) if err != nil { return nil, fmt.Errorf("error setting up TLS: %w", err) } - conn, err := c.Dial(a) + conn, err := c.Dial(nameserver) if err != nil { return nil, fmt.Errorf("failed to connect for transfer: %w", err) } t.Conn = conn - return t.In(m, r.nameserver) + return t.In(m, nameserver) } -func (r rfc2136Provider) List() ([]dns.RR, error) { +func (r *rfc2136Provider) List() ([]dns.RR, error) { if !r.axfr { log.Debug("axfr is disabled") return make([]dns.RR, 0), nil @@ -258,38 +293,55 @@ func (r rfc2136Provider) List() ([]dns.RR, error) { m.SetTsig(r.tsigKeyName, r.tsigSecretAlg, clockSkew, time.Now().Unix()) } - env, err := r.actions.IncomeTransfer(m, r.nameserver) - if err != nil { - return nil, fmt.Errorf("failed to fetch records via AXFR: %w", err) - } + var lastErr error + for i := 0; i < len(r.nameservers); i++ { + nameserver := r.getNextNameserver() + log.Debugf("Fetching records from nameserver: %s", nameserver) - for e := range env { - if e.Error != nil { - if e.Error == dns.ErrSoa { - log.Error("AXFR error: unexpected response received from the server") - } else { - log.Errorf("AXFR error: %v", e.Error) - } + env, err := r.actions.IncomeTransfer(m, nameserver) + if err != nil { + lastErr = fmt.Errorf("failed to fetch records via AXFR: %w", err) + r.lastErr = lastErr continue } - records = append(records, e.RR...) + + for e := range env { + if e.Error != nil { + if e.Error == dns.ErrSoa { + log.Error("AXFR error: unexpected response received from the server") + } else { + log.Errorf("AXFR error: %v", e.Error) + } + continue + } + records = append(records, e.RR...) + } + // If records were fetched successfully, break out of the loop + if len(records) > 0 { + return records, nil + } + } + + if lastErr != nil { + r.lastErr = lastErr + return nil, lastErr } } return records, nil } -func (r rfc2136Provider) AddReverseRecord(ip string, hostname string) error { +func (r *rfc2136Provider) AddReverseRecord(ip string, hostname string) error { changes := r.GenerateReverseRecord(ip, hostname) return r.ApplyChanges(context.Background(), &plan.Changes{Create: changes}) } -func (r rfc2136Provider) RemoveReverseRecord(ip string, hostname string) error { +func (r *rfc2136Provider) RemoveReverseRecord(ip string, hostname string) error { changes := r.GenerateReverseRecord(ip, hostname) return r.ApplyChanges(context.Background(), &plan.Changes{Delete: changes}) } -func (r rfc2136Provider) GenerateReverseRecord(ip string, hostname string) []*endpoint.Endpoint { +func (r *rfc2136Provider) GenerateReverseRecord(ip string, hostname string) []*endpoint.Endpoint { // Find the zone for the PTR record // zone := findMsgZone(&endpoint.Endpoint{DNSName: ip}, p.ptrZoneNames) // Generate PTR notation record starting from the IP address @@ -309,7 +361,7 @@ func (r rfc2136Provider) GenerateReverseRecord(ip string, hostname string) []*en } // ApplyChanges applies a given set of changes in a given zone. -func (r rfc2136Provider) ApplyChanges(ctx context.Context, changes *plan.Changes) error { +func (r *rfc2136Provider) ApplyChanges(ctx context.Context, changes *plan.Changes) error { log.Debugf("ApplyChanges (Create: %d, UpdateOld: %d, UpdateNew: %d, Delete: %d)", len(changes.Create), len(changes.UpdateOld), len(changes.UpdateNew), len(changes.Delete)) var errors []error @@ -435,7 +487,7 @@ func (r rfc2136Provider) ApplyChanges(ctx context.Context, changes *plan.Changes return nil } -func (r rfc2136Provider) UpdateRecord(m *dns.Msg, oldEp *endpoint.Endpoint, newEp *endpoint.Endpoint) error { +func (r *rfc2136Provider) UpdateRecord(m *dns.Msg, oldEp *endpoint.Endpoint, newEp *endpoint.Endpoint) error { err := r.RemoveRecord(m, oldEp) if err != nil { return err @@ -444,7 +496,7 @@ func (r rfc2136Provider) UpdateRecord(m *dns.Msg, oldEp *endpoint.Endpoint, newE return r.AddRecord(m, newEp) } -func (r rfc2136Provider) AddRecord(m *dns.Msg, ep *endpoint.Endpoint) error { +func (r *rfc2136Provider) AddRecord(m *dns.Msg, ep *endpoint.Endpoint) error { log.Debugf("AddRecord.ep=%s", ep) ttl := int64(r.minTTL.Seconds()) @@ -467,7 +519,7 @@ func (r rfc2136Provider) AddRecord(m *dns.Msg, ep *endpoint.Endpoint) error { return nil } -func (r rfc2136Provider) RemoveRecord(m *dns.Msg, ep *endpoint.Endpoint) error { +func (r *rfc2136Provider) RemoveRecord(m *dns.Msg, ep *endpoint.Endpoint) error { log.Debugf("RemoveRecord.ep=%s", ep) for _, target := range ep.Targets { newRR := fmt.Sprintf("%s %d %s %s", ep.DNSName, ep.RecordTTL, ep.RecordType, target) @@ -484,51 +536,111 @@ func (r rfc2136Provider) RemoveRecord(m *dns.Msg, ep *endpoint.Endpoint) error { return nil } -func (r rfc2136Provider) SendMessage(msg *dns.Msg) error { +func (r *rfc2136Provider) getNextNameserver() string { + if len(r.nameservers) == 1 { + return r.nameservers[0] + } + + r.mu.Lock() + defer r.mu.Unlock() + + if r.lastErr != nil { + log.Warnf("Last operation failed for nameserver %s", r.nameservers[r.counter]) + log.Warnf("Last operation error message: %v", r.lastErr) + } + + var nameserver string + switch r.loadBalancingStrategy { + case "random": + for { + nameserver = r.nameservers[r.randGen.Intn(len(r.nameservers))] + // Ensure that we don't get the same nameserver as the last one + if nameserver != r.nameservers[r.counter] { + break + } + } + case "round-robin": + nameserver = r.nameservers[r.counter] + r.counter = (r.counter + 1) % len(r.nameservers) + default: + if r.lastErr != nil { + r.counter = (r.counter + 1) % len(r.nameservers) + nameserver = r.nameservers[r.counter] + } else { + nameserver = r.nameservers[r.counter] + } + } + + // Last error has been logged, reset it for the next operation + r.lastErr = nil + return nameserver +} + +func (r *rfc2136Provider) SendMessage(msg *dns.Msg) error { if r.dryRun { log.Debugf("SendMessage.skipped") return nil } log.Debugf("SendMessage") - c, err := makeClient(r) - if err != nil { - return fmt.Errorf("error setting up TLS: %w", err) - } + var lastErr error + for i := 0; i < len(r.nameservers); i++ { + nameserver := r.getNextNameserver() + log.Debugf("Sending message to nameserver: %s", nameserver) - if !r.insecure { - if r.gssTsig { - keyName, handle, err := r.KeyData() - if err != nil { - return err + c, err := makeClient(r, nameserver) + if err != nil { + lastErr = fmt.Errorf("error setting up TLS: %w", err) + r.lastErr = lastErr + continue + } + + if !r.insecure { + if r.gssTsig { + keyName, handle, err := r.KeyData(nameserver) + if err != nil { + lastErr = err + r.lastErr = lastErr + continue + } + defer handle.Close() + defer handle.DeleteContext(keyName) + + c.TsigProvider = handle + + msg.SetTsig(keyName, tsig.GSS, clockSkew, time.Now().Unix()) + } else { + c.TsigProvider = tsig.HMAC{r.tsigKeyName: r.tsigSecret} + msg.SetTsig(r.tsigKeyName, r.tsigSecretAlg, clockSkew, time.Now().Unix()) } - defer handle.Close() - defer handle.DeleteContext(keyName) - - c.TsigProvider = handle - - msg.SetTsig(keyName, tsig.GSS, clockSkew, time.Now().Unix()) - } else { - c.TsigProvider = tsig.HMAC{r.tsigKeyName: r.tsigSecret} - msg.SetTsig(r.tsigKeyName, r.tsigSecretAlg, clockSkew, time.Now().Unix()) } - } - resp, _, err := c.Exchange(msg, r.nameserver) - if err != nil { + resp, _, err := c.Exchange(msg, nameserver) + if err != nil { + if resp != nil && resp.Rcode != dns.RcodeSuccess { + log.Infof("error in dns.Client.Exchange: %s", err) + lastErr = err + r.lastErr = lastErr + continue + } + log.Warnf("warn in dns.Client.Exchange: %s", err) + lastErr = err + r.lastErr = lastErr + continue + } if resp != nil && resp.Rcode != dns.RcodeSuccess { - log.Infof("error in dns.Client.Exchange: %s", err) - return err + log.Infof("Bad dns.Client.Exchange response: %s", resp) + lastErr = fmt.Errorf("bad return code: %s", dns.RcodeToString[resp.Rcode]) + r.lastErr = lastErr + continue } - log.Warnf("warn in dns.Client.Exchange: %s", err) - } - if resp != nil && resp.Rcode != dns.RcodeSuccess { - log.Infof("Bad dns.Client.Exchange response: %s", resp) - return fmt.Errorf("bad return code: %s", dns.RcodeToString[resp.Rcode]) + + log.Debugf("SendMessage.success") + return nil } - log.Debugf("SendMessage.success") - return nil + r.lastErr = lastErr + return lastErr } func chunkBy(slice []*endpoint.Endpoint, chunkSize int) [][]*endpoint.Endpoint { @@ -558,9 +670,12 @@ func findMsgZone(ep *endpoint.Endpoint, zoneNames []string) string { return dns.Fqdn(".") } -func makeClient(r rfc2136Provider) (result *dns.Client, err error) { +func makeClient(r *rfc2136Provider, nameserver string) (*dns.Client, error) { c := new(dns.Client) + // Remove port from nameserver + nameserver = strings.Split(nameserver, ":")[0] + if r.tlsConfig.UseTLS { log.Debug("RFC2136 Connecting via TLS") c.Net = "tcp-tls" @@ -568,7 +683,7 @@ func makeClient(r rfc2136Provider) (result *dns.Client, err error) { r.tlsConfig.ClientCertFilePath, r.tlsConfig.ClientCertKeyFilePath, r.tlsConfig.CAFilePath, - r.tlsConfig.ServerName, + nameserver, // Use the current nameserver r.tlsConfig.SkipTLSVerify, // Per RFC9103 tls.VersionTLS13, diff --git a/provider/rfc2136/rfc2136_test.go b/provider/rfc2136/rfc2136_test.go index efaa79d18..8bdff70cd 100644 --- a/provider/rfc2136/rfc2136_test.go +++ b/provider/rfc2136/rfc2136_test.go @@ -20,9 +20,11 @@ import ( "context" "crypto/tls" "fmt" + "math/rand" "os" "regexp" "sort" + "strconv" "strings" "testing" "time" @@ -37,16 +39,53 @@ import ( ) type rfc2136Stub struct { - output []*dns.Envelope - updateMsgs []*dns.Msg - createMsgs []*dns.Msg + output []*dns.Envelope + updateMsgs []*dns.Msg + createMsgs []*dns.Msg + nameservers []string + counter int + randGen *rand.Rand + lastNameserver string + loadBalancingStrategy string } func newStub() *rfc2136Stub { return &rfc2136Stub{ - output: make([]*dns.Envelope, 0), - updateMsgs: make([]*dns.Msg, 0), - createMsgs: make([]*dns.Msg, 0), + output: make([]*dns.Envelope, 0), + updateMsgs: make([]*dns.Msg, 0), + createMsgs: make([]*dns.Msg, 0), + nameservers: []string{""}, + randGen: rand.New(rand.NewSource(time.Now().UnixNano())), + loadBalancingStrategy: "round-robin", + } +} + +func newStubLB(strategy string, nameservers []string) *rfc2136Stub { + return &rfc2136Stub{ + output: make([]*dns.Envelope, 0), + updateMsgs: make([]*dns.Msg, 0), + createMsgs: make([]*dns.Msg, 0), + nameservers: nameservers, + randGen: rand.New(rand.NewSource(time.Now().UnixNano())), + loadBalancingStrategy: strategy, + } +} + +func (r *rfc2136Stub) getNextNameserver() string { + if len(r.nameservers) == 1 { + return r.nameservers[0] + } + + switch r.loadBalancingStrategy { + case "random": + return r.nameservers[r.randGen.Intn(len(r.nameservers))] + case "round-robin": + nameserver := r.nameservers[r.counter] + r.counter = (r.counter + 1) % len(r.nameservers) + + return nameserver + default: + return r.nameservers[0] } } @@ -61,6 +100,8 @@ func getSortedChanges(msgs []*dns.Msg) []string { } func (r *rfc2136Stub) SendMessage(msg *dns.Msg) error { + r.lastNameserver = r.getNextNameserver() + log.Info("Sending message to nameserver: ", r.lastNameserver) zone := extractZoneFromMessage(msg.String()) // Make sure the zone starts with . to make sure HasSuffix does not match forbar.com for zone bar.com if !strings.HasPrefix(zone, ".") { @@ -127,11 +168,26 @@ func createRfc2136StubProvider(stub *rfc2136Stub) (provider.Provider, error) { ClientCertFilePath: "", ClientCertKeyFilePath: "", } - return NewRfc2136Provider("", 0, nil, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, stub) + return NewRfc2136Provider([]string{""}, 0, nil, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, "", stub) +} + +func createRfc2136StubProviderWithHosts(stub *rfc2136Stub) (provider.Provider, error) { + tlsConfig := TLSConfig{ + UseTLS: false, + SkipTLSVerify: false, + CAFilePath: "", + ClientCertFilePath: "", + ClientCertKeyFilePath: "", + } + return NewRfc2136Provider([]string{"rfc2136-host1", "rfc2136-host2", "rfc2136-host3"}, 0, nil, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, "", stub) } func createRfc2136TLSStubProvider(stub *rfc2136Stub, tlsConfig TLSConfig) (provider.Provider, error) { - return NewRfc2136Provider("rfc2136-host", 0, nil, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, stub) + return NewRfc2136Provider([]string{"rfc2136-host"}, 0, nil, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, "", stub) +} + +func createRfc2136TLSStubProviderWithHosts(stub *rfc2136Stub, tlsConfig TLSConfig) (provider.Provider, error) { + return NewRfc2136Provider([]string{"rfc2136-host1", "rfc2136-host2"}, 0, nil, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, "", stub) } func createRfc2136StubProviderWithReverse(stub *rfc2136Stub) (provider.Provider, error) { @@ -144,7 +200,7 @@ func createRfc2136StubProviderWithReverse(stub *rfc2136Stub) (provider.Provider, } zones := []string{"foo.com", "3.2.1.in-addr.arpa"} - return NewRfc2136Provider("", 0, zones, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{Filters: zones}, false, 300*time.Second, true, false, "", "", "", 50, tlsConfig, stub) + return NewRfc2136Provider([]string{""}, 0, zones, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{Filters: zones}, false, 300*time.Second, true, false, "", "", "", 50, tlsConfig, "", stub) } func createRfc2136StubProviderWithZones(stub *rfc2136Stub) (provider.Provider, error) { @@ -156,7 +212,7 @@ func createRfc2136StubProviderWithZones(stub *rfc2136Stub) (provider.Provider, e ClientCertKeyFilePath: "", } zones := []string{"foo.com", "foobar.com"} - return NewRfc2136Provider("", 0, zones, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, stub) + return NewRfc2136Provider([]string{""}, 0, zones, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, "", stub) } func createRfc2136StubProviderWithZonesFilters(stub *rfc2136Stub) (provider.Provider, error) { @@ -168,7 +224,18 @@ func createRfc2136StubProviderWithZonesFilters(stub *rfc2136Stub) (provider.Prov ClientCertKeyFilePath: "", } zones := []string{"foo.com", "foobar.com"} - return NewRfc2136Provider("", 0, zones, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{Filters: zones}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, stub) + return NewRfc2136Provider([]string{""}, 0, zones, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{Filters: zones}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, "", stub) +} + +func createRfc2136StubProviderWithStrategy(stub *rfc2136Stub, strategy string) (provider.Provider, error) { + tlsConfig := TLSConfig{ + UseTLS: false, + SkipTLSVerify: false, + CAFilePath: "", + ClientCertFilePath: "", + ClientCertKeyFilePath: "", + } + return NewRfc2136Provider([]string{"rfc2136-host1", "rfc2136-host2", "rfc2136-host3"}, 0, nil, false, "key", "secret", "hmac-sha512", true, endpoint.DomainFilter{}, false, 300*time.Second, false, false, "", "", "", 50, tlsConfig, strategy, stub) } func extractUpdateSectionFromMessage(msg fmt.Stringer) []string { @@ -260,7 +327,7 @@ ouB5ZN+05DzKCQhBekMnygQ= rawProvider := provider.(*rfc2136Provider) - client, err := makeClient(*rawProvider) + client, err := makeClient(rawProvider, rawProvider.nameservers[0]) assert.NoError(t, err) assert.Equal(t, "tcp-tls", client.Net) @@ -270,6 +337,51 @@ ouB5ZN+05DzKCQhBekMnygQ= assert.Equal(t, []string{"dot"}, client.TLSConfig.NextProtos) } +func TestRfc2136TLSConfigWithMultiHosts(t *testing.T) { + stub := newStub() + + caFile, err := os.CreateTemp("", "rfc2136-test-XXXXXXXX.crt") + assert.NoError(t, err) + defer os.Remove(caFile.Name()) + _, err = caFile.Write([]byte( + `-----BEGIN CERTIFICATE----- +MIH+MIGxAhR2n1aQk0ONrQ8QQfa6GCzFWLmTXTAFBgMrZXAwITELMAkGA1UEBhMC +REUxEjAQBgNVBAMMCWxvY2FsaG9zdDAgFw0yMzEwMjQwNzI5NDNaGA8yMTIzMDkz +MDA3Mjk0M1owITELMAkGA1UEBhMCREUxEjAQBgNVBAMMCWxvY2FsaG9zdDAqMAUG +AytlcAMhAA1FzGJXuQdOpKv02SEl7SIA8SP8RVRI0QTi1bUFiFBLMAUGAytlcANB +ADiCKRUGDMyafSSYhl0KXoiXrFOxvhrGM5l15L4q82JM5Qb8wv0gNrnbGTZlInuv +ouB5ZN+05DzKCQhBekMnygQ= +-----END CERTIFICATE----- +`)) + + tlsConfig := TLSConfig{ + UseTLS: true, + SkipTLSVerify: false, + CAFilePath: caFile.Name(), + ClientCertFilePath: "", + ClientCertKeyFilePath: "", + } + + provider, err := createRfc2136TLSStubProviderWithHosts(stub, tlsConfig) + assert.NoError(t, err) + + rawProvider := provider.(*rfc2136Provider) + + for _, ns := range rawProvider.nameservers { + client, err := makeClient(rawProvider, ns) + assert.NoError(t, err) + + // strip port from ns + ns = strings.Split(ns, ":")[0] + + assert.Equal(t, "tcp-tls", client.Net) + assert.Equal(t, false, client.TLSConfig.InsecureSkipVerify) + assert.Equal(t, ns, client.TLSConfig.ServerName) + assert.Equal(t, uint16(tls.VersionTLS13), client.TLSConfig.MinVersion) + assert.Equal(t, []string{"dot"}, client.TLSConfig.NextProtos) + } +} + func TestRfc2136TLSConfigNoVerify(t *testing.T) { stub := newStub() @@ -300,7 +412,7 @@ ouB5ZN+05DzKCQhBekMnygQ= rawProvider := provider.(*rfc2136Provider) - client, err := makeClient(*rawProvider) + client, err := makeClient(rawProvider, rawProvider.nameservers[0]) assert.NoError(t, err) assert.Equal(t, "tcp-tls", client.Net) @@ -369,7 +481,7 @@ hl6aAPCe16pwvljB7yImxLJ+ytWk7OV/s10cmlaczrEtNeUjV1X9MTM= rawProvider := provider.(*rfc2136Provider) - client, err := makeClient(*rawProvider) + client, err := makeClient(rawProvider, rawProvider.nameservers[0]) log.Infof("client, err is: %v", client) log.Infof("client, err is: %s", err) assert.NoError(t, err) @@ -773,3 +885,44 @@ func contains(arr []*endpoint.Endpoint, name string) bool { } return false } + +// TestRoundRobinLoadBalancing tests the round-robin load balancing strategy. +func TestRoundRobinLoadBalancing(t *testing.T) { + stub := newStubLB("round-robin", []string{"rfc2136-host1", "rfc2136-host2", "rfc2136-host3"}) + _, err := createRfc2136StubProviderWithHosts(stub) + assert.NoError(t, err) + + m := new(dns.Msg) + m.SetUpdate("foo.com.") + rr, err := dns.NewRR(fmt.Sprintf("%s %d %s %s", "v1.foo.com.", 0, "A", "1.2.3.4")) + m.Insert([]dns.RR{rr}) + + for i := 0; i < 10; i++ { + err := stub.SendMessage(m) + assert.NoError(t, err) + expectedNameserver := "rfc2136-host" + strconv.Itoa((i%3)+1) + assert.Equal(t, expectedNameserver, stub.lastNameserver) + } +} + +// TestRandomLoadBalancing tests the random load balancing strategy. +func TestRandomLoadBalancing(t *testing.T) { + stub := newStubLB("random", []string{"rfc2136-host1", "rfc2136-host2", "rfc2136-host3"}) + _, err := createRfc2136StubProvider(stub) + assert.NoError(t, err) + + m := new(dns.Msg) + m.SetUpdate("foo.com.") + rr, err := dns.NewRR(fmt.Sprintf("%s %d %s %s", "v1.foo.com.", 0, "A", "1.2.3.4")) + m.Insert([]dns.RR{rr}) + + nameserverCounts := map[string]int{} + + for i := 0; i < 25; i++ { + err := stub.SendMessage(m) + assert.NoError(t, err) + nameserverCounts[stub.lastNameserver]++ + } + + assert.Greater(t, len(nameserverCounts), 1, "Expected multiple nameservers to be used in random strategy") +}