mirror of
https://github.com/siderolabs/talos.git
synced 2025-08-17 20:17:03 +02:00
fix: prevent endless loop with DHCP requests in networkd
There were two problems: * `configureInterfaces` was always failing if interface is already set up, as the routes already exist * `renew` was halving the renew interval each time `configureInterface` fails, which starts at (LeaseTime/2) and goes effectively to zero This was leading to high networkd CPU usage, storm of DHCP requests on the network. Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
This commit is contained in:
parent
71063c4a6e
commit
360d887967
@ -46,9 +46,11 @@ func (d *DHCP) Link() *net.Interface {
|
||||
// Discover handles the DHCP client exchange stores the DHCP Ack.
|
||||
func (d *DHCP) Discover(ctx context.Context, link *net.Interface) error {
|
||||
d.NetIf = link
|
||||
// TODO do something with context
|
||||
ack, err := d.discover()
|
||||
d.Ack = ack
|
||||
ack, err := d.discover(ctx)
|
||||
|
||||
if ack != nil {
|
||||
d.Ack = ack
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
@ -191,7 +193,7 @@ func (d *DHCP) Hostname() (hostname string) {
|
||||
}
|
||||
|
||||
// discover handles the actual DHCP conversation.
|
||||
func (d *DHCP) discover() (*dhcpv4.DHCPv4, error) {
|
||||
func (d *DHCP) discover(ctx context.Context) (*dhcpv4.DHCPv4, error) {
|
||||
opts := []dhcpv4.OptionCode{
|
||||
dhcpv4.OptionClasslessStaticRoute,
|
||||
dhcpv4.OptionDomainNameServer,
|
||||
@ -223,7 +225,7 @@ func (d *DHCP) discover() (*dhcpv4.DHCPv4, error) {
|
||||
// nolint: errcheck
|
||||
defer cli.Close()
|
||||
|
||||
lease, err := cli.Request(context.Background(), mods...)
|
||||
lease, err := cli.Request(ctx, mods...)
|
||||
if err != nil {
|
||||
// TODO: Make this a well defined error so we can make it not fatal
|
||||
log.Printf("failed dhcp request for %q: %v", d.NetIf.Name, err)
|
||||
@ -231,5 +233,21 @@ func (d *DHCP) discover() (*dhcpv4.DHCPv4, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Printf("DHCP ACK on %q: %s", d.NetIf.Name, collapseSummary(lease.ACK.Summary()))
|
||||
|
||||
return lease.ACK, err
|
||||
}
|
||||
|
||||
func collapseSummary(summary string) string {
|
||||
lines := strings.Split(summary, "\n")[1:]
|
||||
|
||||
for i := range lines {
|
||||
lines[i] = strings.TrimSpace(lines[i])
|
||||
}
|
||||
|
||||
if len(lines) > 0 && lines[len(lines)-1] == "" {
|
||||
lines = lines[:len(lines)-1]
|
||||
}
|
||||
|
||||
return strings.Join(lines, ", ")
|
||||
}
|
||||
|
@ -48,6 +48,10 @@ func (n *NetworkInterface) setMTU(idx int, mtu uint32) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if msg.Attributes != nil && msg.Attributes.MTU == mtu {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = n.rtConn.Link.Set(&rtnetlink.LinkMessage{
|
||||
Family: msg.Family,
|
||||
Type: msg.Type,
|
||||
|
@ -312,18 +312,26 @@ func (n *NetworkInterface) Renew() {
|
||||
// address TTL. If that fails, we'll continue to attempt to retry every
|
||||
// halflife.
|
||||
func (n *NetworkInterface) renew(method address.Addressing) {
|
||||
const minRenewDuration = 5 * time.Second // protect from renewing too often
|
||||
|
||||
renewDuration := method.TTL() / 2
|
||||
|
||||
var err error
|
||||
|
||||
for {
|
||||
<-time.After(renewDuration)
|
||||
time.Sleep(renewDuration)
|
||||
|
||||
if err = n.configureInterface(method, n.Link); err != nil {
|
||||
log.Printf("failure to renew address for %q: %s", n.Name, err)
|
||||
|
||||
renewDuration = (renewDuration / 2)
|
||||
} else {
|
||||
renewDuration = method.TTL() / 2
|
||||
}
|
||||
|
||||
if renewDuration < minRenewDuration {
|
||||
renewDuration = minRenewDuration
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -337,7 +345,7 @@ func (n *NetworkInterface) configureInterface(method address.Addressing, link *n
|
||||
|
||||
// Set link MTU in any case
|
||||
if err = n.setMTU(method.Link().Index, method.MTU()); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("error setting MTU %d on %q: %w", method.MTU(), n.Name, err)
|
||||
}
|
||||
|
||||
if discoverErr != nil {
|
||||
@ -368,7 +376,7 @@ func (n *NetworkInterface) configureInterface(method address.Addressing, link *n
|
||||
switch err := err.(type) {
|
||||
case *netlink.OpError:
|
||||
if !os.IsExist(err.Err) && err.Err != syscall.ESRCH {
|
||||
return err
|
||||
return fmt.Errorf("error adding address %s on %q: %w", method.Address(), n.Name, err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("failed to add address (already exists) %+v to %s: %v", method.Address(), method.Link().Name, err)
|
||||
@ -401,7 +409,10 @@ func (n *NetworkInterface) configureInterface(method address.Addressing, link *n
|
||||
|
||||
err = n.rtnlConn.RouteAdd(method.Link(), *r.Destination, gw, rtnl.WithRouteSrc(src), rtnl.WithRouteAttrs(attr))
|
||||
if err != nil {
|
||||
return err
|
||||
// ignore "EEXIST" errors for routes which are already present
|
||||
if opErr, ok := err.(*netlink.OpError); !ok || !os.IsExist(opErr.Err) {
|
||||
return fmt.Errorf("error adding route %s %s on %q: %s", *r.Destination, gw, n.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,7 @@ func handler(serverIP net.IP, statePath string) server4.Handler {
|
||||
dhcpv4.WithOption(dhcpv4.OptHostName(match.Hostname)),
|
||||
dhcpv4.WithOption(dhcpv4.OptDNS(match.Nameservers...)),
|
||||
dhcpv4.WithOption(dhcpv4.OptRouter(match.Gateway)),
|
||||
dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(time.Hour)),
|
||||
dhcpv4.WithOption(dhcpv4.OptIPAddressLeaseTime(5*time.Minute)),
|
||||
dhcpv4.WithOption(dhcpv4.OptServerIdentifier(serverIP)),
|
||||
)
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user