From 360d8879679dbeeea38fb2326e63b12b22ff6ccd Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Tue, 1 Dec 2020 17:49:57 +0300 Subject: [PATCH] fix: prevent endless loop with DHCP requests in networkd There were two problems: * `configureInterfaces` was always failing if interface is already set up, as the routes already exist * `renew` was halving the renew interval each time `configureInterface` fails, which starts at (LeaseTime/2) and goes effectively to zero This was leading to high networkd CPU usage, storm of DHCP requests on the network. Signed-off-by: Andrey Smirnov --- internal/app/networkd/pkg/address/dhcp.go | 28 +++++++++++++++++++---- internal/app/networkd/pkg/nic/netlink.go | 4 ++++ internal/app/networkd/pkg/nic/nic.go | 19 +++++++++++---- pkg/provision/providers/vm/dhcpd.go | 2 +- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/internal/app/networkd/pkg/address/dhcp.go b/internal/app/networkd/pkg/address/dhcp.go index ae838aa9e..e45052707 100644 --- a/internal/app/networkd/pkg/address/dhcp.go +++ b/internal/app/networkd/pkg/address/dhcp.go @@ -46,9 +46,11 @@ func (d *DHCP) Link() *net.Interface { // Discover handles the DHCP client exchange stores the DHCP Ack. func (d *DHCP) Discover(ctx context.Context, link *net.Interface) error { d.NetIf = link - // TODO do something with context - ack, err := d.discover() - d.Ack = ack + ack, err := d.discover(ctx) + + if ack != nil { + d.Ack = ack + } return err } @@ -191,7 +193,7 @@ func (d *DHCP) Hostname() (hostname string) { } // discover handles the actual DHCP conversation. -func (d *DHCP) discover() (*dhcpv4.DHCPv4, error) { +func (d *DHCP) discover(ctx context.Context) (*dhcpv4.DHCPv4, error) { opts := []dhcpv4.OptionCode{ dhcpv4.OptionClasslessStaticRoute, dhcpv4.OptionDomainNameServer, @@ -223,7 +225,7 @@ func (d *DHCP) discover() (*dhcpv4.DHCPv4, error) { // nolint: errcheck defer cli.Close() - lease, err := cli.Request(context.Background(), mods...) + lease, err := cli.Request(ctx, mods...) if err != nil { // TODO: Make this a well defined error so we can make it not fatal log.Printf("failed dhcp request for %q: %v", d.NetIf.Name, err) @@ -231,5 +233,21 @@ func (d *DHCP) discover() (*dhcpv4.DHCPv4, error) { return nil, err } + log.Printf("DHCP ACK on %q: %s", d.NetIf.Name, collapseSummary(lease.ACK.Summary())) + return lease.ACK, err } + +func collapseSummary(summary string) string { + lines := strings.Split(summary, "\n")[1:] + + for i := range lines { + lines[i] = strings.TrimSpace(lines[i]) + } + + if len(lines) > 0 && lines[len(lines)-1] == "" { + lines = lines[:len(lines)-1] + } + + return strings.Join(lines, ", ") +} diff --git a/internal/app/networkd/pkg/nic/netlink.go b/internal/app/networkd/pkg/nic/netlink.go index d651bdb6f..5bb2ee1c7 100644 --- a/internal/app/networkd/pkg/nic/netlink.go +++ b/internal/app/networkd/pkg/nic/netlink.go @@ -48,6 +48,10 @@ func (n *NetworkInterface) setMTU(idx int, mtu uint32) error { return err } + if msg.Attributes != nil && msg.Attributes.MTU == mtu { + return nil + } + err = n.rtConn.Link.Set(&rtnetlink.LinkMessage{ Family: msg.Family, Type: msg.Type, diff --git a/internal/app/networkd/pkg/nic/nic.go b/internal/app/networkd/pkg/nic/nic.go index 97af6201d..b05dd6d16 100644 --- a/internal/app/networkd/pkg/nic/nic.go +++ b/internal/app/networkd/pkg/nic/nic.go @@ -312,18 +312,26 @@ func (n *NetworkInterface) Renew() { // address TTL. If that fails, we'll continue to attempt to retry every // halflife. func (n *NetworkInterface) renew(method address.Addressing) { + const minRenewDuration = 5 * time.Second // protect from renewing too often + renewDuration := method.TTL() / 2 var err error for { - <-time.After(renewDuration) + time.Sleep(renewDuration) if err = n.configureInterface(method, n.Link); err != nil { + log.Printf("failure to renew address for %q: %s", n.Name, err) + renewDuration = (renewDuration / 2) } else { renewDuration = method.TTL() / 2 } + + if renewDuration < minRenewDuration { + renewDuration = minRenewDuration + } } } @@ -337,7 +345,7 @@ func (n *NetworkInterface) configureInterface(method address.Addressing, link *n // Set link MTU in any case if err = n.setMTU(method.Link().Index, method.MTU()); err != nil { - return err + return fmt.Errorf("error setting MTU %d on %q: %w", method.MTU(), n.Name, err) } if discoverErr != nil { @@ -368,7 +376,7 @@ func (n *NetworkInterface) configureInterface(method address.Addressing, link *n switch err := err.(type) { case *netlink.OpError: if !os.IsExist(err.Err) && err.Err != syscall.ESRCH { - return err + return fmt.Errorf("error adding address %s on %q: %w", method.Address(), n.Name, err) } default: return fmt.Errorf("failed to add address (already exists) %+v to %s: %v", method.Address(), method.Link().Name, err) @@ -401,7 +409,10 @@ func (n *NetworkInterface) configureInterface(method address.Addressing, link *n err = n.rtnlConn.RouteAdd(method.Link(), *r.Destination, gw, rtnl.WithRouteSrc(src), rtnl.WithRouteAttrs(attr)) if err != nil { - return err + // ignore "EEXIST" errors for routes which are already present + if opErr, ok := err.(*netlink.OpError); !ok || !os.IsExist(opErr.Err) { + return fmt.Errorf("error adding route %s %s on %q: %s", *r.Destination, gw, n.Name, err) + } } } diff --git a/pkg/provision/providers/vm/dhcpd.go b/pkg/provision/providers/vm/dhcpd.go index d41fc4a98..ca18786e6 100644 --- a/pkg/provision/providers/vm/dhcpd.go +++ b/pkg/provision/providers/vm/dhcpd.go @@ -52,7 +52,7 @@ func handler(serverIP net.IP, statePath string) server4.Handler { dhcpv4.WithOption(dhcpv4.OptHostName(match.Hostname)), dhcpv4.WithOption(dhcpv4.OptDNS(match.Nameservers...)), dhcpv4.WithOption(dhcpv4.OptRouter(match.Gateway)), - dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(time.Hour)), + dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(5*time.Minute)), dhcpv4.WithOption(dhcpv4.OptServerIdentifier(serverIP)), ) if err != nil {