2025-04-23 22:56:24 +02:00

432 lines
15 KiB
Go

package routing
import (
"context"
"errors"
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/cloudnativelabs/kube-router/v2/pkg/metrics"
"github.com/cloudnativelabs/kube-router/v2/pkg/options"
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
gobgpapi "github.com/osrg/gobgp/v3/api"
gobgp "github.com/osrg/gobgp/v3/pkg/server"
v1core "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
)
// Refresh the peer relationship with rest of the nodes in the cluster (iBGP peers). Node add/remove
// events should ensure peer relationship with only currently active nodes. In case
// we miss any events from API server this method which is called periodically
// ensures peer relationship with removed nodes is deleted.
func (nrc *NetworkRoutingController) syncInternalPeers() {
nrc.mu.Lock()
defer nrc.mu.Unlock()
start := time.Now()
defer func() {
endTime := time.Since(start)
if nrc.MetricsEnabled {
metrics.ControllerBGPInternalPeersSyncTime.Observe(endTime.Seconds())
}
klog.V(2).Infof("Syncing BGP peers for the node took %v", endTime)
}()
// get the current list of the nodes from API server
nodes := nrc.nodeLister.List()
if nrc.MetricsEnabled {
metrics.ControllerBPGpeers.Set(float64(len(nodes)))
}
// establish peer and add Pod CIDRs with current set of nodes
currentNodes := make([]string, 0)
for _, obj := range nodes {
node := obj.(*v1core.Node)
targetNode, err := utils.NewRemoteKRNode(node)
if err != nil {
klog.Errorf("failed to create KRNode from node object: %v", err)
continue
}
// skip self
if targetNode.GetPrimaryNodeIP().Equal(nrc.krNode.GetPrimaryNodeIP()) {
continue
}
// we are rr-client peer only with rr-server
if nrc.bgpRRClient {
if _, ok := node.Annotations[rrServerAnnotation]; !ok {
continue
}
}
// if node full mesh is not requested then just peer with nodes with same ASN
// (run iBGP among same ASN peers)
if !nrc.bgpFullMeshMode {
nodeasn, ok := node.Annotations[nodeASNAnnotation]
if !ok {
klog.Infof("Not peering with the Node %s as ASN number of the node is unknown.",
targetNode.GetPrimaryNodeIP().String())
continue
}
asnNo, err := strconv.ParseUint(nodeasn, 0, asnMaxBitSize)
if err != nil {
klog.Infof("Not peering with the Node %s as ASN number of the node is invalid.",
targetNode.GetPrimaryNodeIP().String())
continue
}
// if the nodes ASN number is different from ASN number of current node skip peering
if nrc.nodeAsnNumber != uint32(asnNo) {
klog.Infof("Not peering with the Node %s as ASN number of the node is different.",
targetNode.GetPrimaryNodeIP().String())
continue
}
}
targetNodeIsIPv4 := targetNode.GetPrimaryNodeIP().To4() != nil
sourceNodeIsIPv4 := nrc.krNode.GetPrimaryNodeIP().To4() != nil
if targetNodeIsIPv4 != sourceNodeIsIPv4 {
klog.Warningf("Not peering with Node %s as it's primary IP (%s) uses a different protocol than "+
"our primary IP (%s)", node.Name, targetNode.GetPrimaryNodeIP(),
nrc.krNode.GetPrimaryNodeIP())
continue
}
currentNodes = append(currentNodes, targetNode.GetPrimaryNodeIP().String())
nrc.activeNodes[targetNode.GetPrimaryNodeIP().String()] = true
// explicitly set neighbors.transport.config.local-address with primaryIP which is configured
// as their neighbor address at the remote peers.
// this prevents the controller from initiating connection to its peers with a different IP address
// when multiple L3 interfaces are active.
n := &gobgpapi.Peer{
Conf: &gobgpapi.PeerConf{
NeighborAddress: targetNode.GetPrimaryNodeIP().String(),
PeerAsn: nrc.nodeAsnNumber,
},
Transport: &gobgpapi.Transport{
LocalAddress: nrc.krNode.GetPrimaryNodeIP().String(),
RemotePort: nrc.bgpPort,
},
}
if nrc.bgpGracefulRestart {
n.GracefulRestart = &gobgpapi.GracefulRestart{
Enabled: true,
RestartTime: uint32(nrc.bgpGracefulRestartTime.Seconds()),
DeferralTime: uint32(nrc.bgpGracefulRestartDeferralTime.Seconds()),
LocalRestarting: true,
}
// We choose to only peer using the protocol of the node's primary IP
if targetNode.IsIPv4Capable() {
afiSafi := gobgpapi.AfiSafi{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
State: &gobgpapi.MpGracefulRestartState{},
},
}
n.AfiSafis = append(n.AfiSafis, &afiSafi)
}
if targetNode.IsIPv6Capable() {
afiSafi := gobgpapi.AfiSafi{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP6, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
State: &gobgpapi.MpGracefulRestartState{},
},
}
n.AfiSafis = append(n.AfiSafis, &afiSafi)
}
}
// we are rr-server peer with other rr-client with reflection enabled
if nrc.bgpRRServer {
if _, ok := node.Annotations[rrClientAnnotation]; ok {
// add rr options with clusterId
n.RouteReflector = &gobgpapi.RouteReflector{
RouteReflectorClient: true,
RouteReflectorClusterId: fmt.Sprint(nrc.bgpClusterID),
}
}
}
// TODO: check if a node is already added as neighbor in a better way than add and catch error
if err := nrc.bgpServer.AddPeer(context.Background(), &gobgpapi.AddPeerRequest{
Peer: n,
}); err != nil {
if !strings.Contains(err.Error(), "can't overwrite the existing peer") {
klog.Errorf("Failed to add node %s as peer due to %s", targetNode.GetPrimaryNodeIP(), err)
}
}
}
// find the list of the node removed, from the last known list of active nodes
removedNodes := make([]string, 0)
for ip := range nrc.activeNodes {
stillActive := false
for _, node := range currentNodes {
if ip == node {
stillActive = true
break
}
}
if !stillActive {
removedNodes = append(removedNodes, ip)
}
}
// delete the neighbor for the nodes that are removed
for _, ip := range removedNodes {
if err := nrc.bgpServer.DeletePeer(context.Background(), &gobgpapi.DeletePeerRequest{Address: ip}); err != nil {
klog.Errorf("Failed to remove node %s as peer due to %s", ip, err)
}
delete(nrc.activeNodes, ip)
}
}
// connectToExternalBGPPeers adds all the configured eBGP peers (global or node specific) as neighbours
func (nrc *NetworkRoutingController) connectToExternalBGPPeers(server *gobgp.BgpServer, peerNeighbors []*gobgpapi.Peer,
bgpGracefulRestart bool, bgpGracefulRestartDeferralTime time.Duration, bgpGracefulRestartTime time.Duration,
peerMultihopTTL uint8) error {
for _, n := range peerNeighbors {
neighborIPStr := n.Conf.NeighborAddress
neighborIP := net.ParseIP(neighborIPStr)
if neighborIP == nil {
klog.Errorf("unable to parse CIDR of global peer (%s), not peering with this peer",
neighborIPStr)
continue
}
peeringAddressForNeighbor := net.ParseIP(n.Transport.LocalAddress)
if peeringAddressForNeighbor == nil {
klog.Errorf("unable to parse our local address for peer (%s), not peering with this peer (%s)",
n.Transport.LocalAddress, neighborIPStr)
}
neighborIsIPv4 := neighborIP.To4() != nil
peeringAddressIsIPv4 := peeringAddressForNeighbor.To4() != nil
if neighborIsIPv4 != peeringAddressIsIPv4 {
klog.Warningf("Not peering with configured peer as it's primary IP (%s) uses a different "+
"protocol than our configured local-address (%s). Its possible that this can be resolved by setting "+
"the local address appropriately", neighborIP, peeringAddressForNeighbor)
continue
}
if bgpGracefulRestart {
n.GracefulRestart = &gobgpapi.GracefulRestart{
Enabled: true,
RestartTime: uint32(bgpGracefulRestartTime.Seconds()),
DeferralTime: uint32(bgpGracefulRestartDeferralTime.Seconds()),
LocalRestarting: true,
}
if nrc.krNode.IsIPv4Capable() {
n.AfiSafis = []*gobgpapi.AfiSafi{
{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
},
},
}
}
if nrc.krNode.IsIPv6Capable() {
afiSafi := gobgpapi.AfiSafi{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP6, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
},
}
n.AfiSafis = append(n.AfiSafis, &afiSafi)
}
}
if peerMultihopTTL > 1 {
n.EbgpMultihop = &gobgpapi.EbgpMultihop{
Enabled: true,
MultihopTtl: uint32(peerMultihopTTL),
}
}
err := server.AddPeer(context.Background(), &gobgpapi.AddPeerRequest{Peer: n})
if err != nil {
return fmt.Errorf("error peering with peer router "+
"%q due to: %s", n.Conf.NeighborAddress, err)
}
klog.V(2).Infof("Successfully configured %s in ASN %v as BGP peer to the node",
n.Conf.NeighborAddress, n.Conf.PeerAsn)
}
return nil
}
// Does validation and returns neighbor configs
func newGlobalPeers(ips []net.IP, ports []uint32, asns []uint32, passwords []string, localips []string,
holdtime float64, localAddress string) ([]*gobgpapi.Peer, error) {
peers := make([]*gobgpapi.Peer, 0)
// Validations
if len(ips) != len(asns) {
return nil, errors.New("invalid peer router config, the number of IPs and ASN numbers must be equal")
}
if len(ips) != len(passwords) && len(passwords) != 0 {
return nil, errors.New("invalid peer router config. The number of passwords should either be zero, or " +
"one per peer router. Use blank items if a router doesn't expect a password. Example: \"pass,,pass\" " +
"OR [\"pass\",\"\",\"pass\"]")
}
if len(ips) != len(ports) && len(ports) != 0 {
return nil, fmt.Errorf("invalid peer router config. The number of ports should either be zero, or "+
"one per peer router. If blank items are used, it will default to standard BGP port, %s. "+
"Example: \"port,,port\" OR [\"port\",\"\",\"port\"]", strconv.Itoa(options.DefaultBgpPort))
}
if len(ips) != len(localips) && len(localips) != 0 {
return nil, fmt.Errorf("invalid peer router config. The number of localIPs should either be zero, or "+
"one per peer router. If blank items are used, it will default to nodeIP, %s. "+
"Example: \"10.1.1.1,,10.1.1.2\" OR [\"10.1.1.1\",\"\",\"10.1.1.2\"]", localAddress)
}
for i := 0; i < len(ips); i++ {
if (asns[i] < 1 || asns[i] > 23455) &&
(asns[i] < 23457 || asns[i] > 63999) &&
(asns[i] < 64512 || asns[i] > 65534) &&
(asns[i] < 131072 || asns[i] > 4199999999) &&
(asns[i] < 4200000000 || asns[i] > 4294967294) {
return nil, fmt.Errorf("reserved ASN number \"%d\" for global BGP peer",
asns[i])
}
// explicitly set neighbors.transport.config.local-address with primaryIP which is configured
// as their neighbor address at the remote peers.
// this prevents the controller from initiating connection to its peers with a different IP address
// when multiple L3 interfaces are active.
peer := &gobgpapi.Peer{
Conf: &gobgpapi.PeerConf{
NeighborAddress: ips[i].String(),
PeerAsn: asns[i],
},
Timers: &gobgpapi.Timers{Config: &gobgpapi.TimersConfig{HoldTime: uint64(holdtime)}},
Transport: &gobgpapi.Transport{
// localAddress defaults to the node's primary IP, but can be overridden below on a peer-by-peer basis
// below via the kube-router.io/peer.localips annotation
LocalAddress: localAddress,
RemotePort: options.DefaultBgpPort,
},
}
if len(ports) != 0 {
peer.Transport.RemotePort = ports[i]
}
if len(passwords) != 0 {
peer.Conf.AuthPassword = passwords[i]
}
// if localip is set and is non-blank for BGP configuration override primaryIP choice set for peer above
if len(localips) != 0 && localips[i] != "" {
peer.Transport.LocalAddress = localips[i]
}
peers = append(peers, peer)
}
return peers, nil
}
func (nrc *NetworkRoutingController) newNodeEventHandler() cache.ResourceEventHandler {
return cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
node := obj.(*v1core.Node)
targetNode, err := utils.NewRemoteKRNode(node)
if err != nil {
klog.Errorf("failed to create KRNode from node object: %v", err)
return
}
klog.V(2).Infof("Received node %s added update from watch API so peer with new node",
targetNode.GetPrimaryNodeIP())
nrc.OnNodeUpdate(obj)
},
UpdateFunc: func(oldObj, newObj interface{}) {
// we are only interested in node add/delete, so skip update
},
DeleteFunc: func(obj interface{}) {
node, ok := obj.(*v1core.Node)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
klog.Errorf("unexpected object type: %v", obj)
return
}
if node, ok = tombstone.Obj.(*v1core.Node); !ok {
klog.Errorf("unexpected object type: %v", obj)
return
}
}
targetNode, err := utils.NewRemoteKRNode(node)
// In this case even if we can't get the NodeIP that's alright as the node is being removed anyway and
// future node lister operations that happen in OnNodeUpdate won't be affected as the node won't be returned
if err == nil && targetNode != nil {
klog.Infof("Received node %s removed update from watch API, so remove node from peer",
targetNode.GetPrimaryNodeIP())
} else {
klog.Infof("Received node (IP unavailable) removed update from watch API, so remove node " +
"from peer")
}
nrc.OnNodeUpdate(obj)
},
}
}
// OnNodeUpdate Handle updates from Node watcher. Node watcher calls this method whenever there is
// new node is added or old node is deleted. So peer up with new node and drop peering
// from old node
func (nrc *NetworkRoutingController) OnNodeUpdate(_ interface{}) {
if !nrc.bgpServerStarted {
return
}
// update export policies so that NeighborSet gets updated with new set of nodes
err := nrc.AddPolicies()
if err != nil {
klog.Errorf("Error adding BGP policies: %s", err.Error())
}
if nrc.bgpEnableInternal {
nrc.syncInternalPeers()
}
// skip if first round of disableSourceDestinationCheck() is not done yet, this is to prevent
// all the nodes for all the node add update trying to perfrom disableSourceDestinationCheck
if nrc.disableSrcDstCheck && nrc.initSrcDstCheckDone && nrc.ec2IamAuthorized {
nrc.disableSourceDestinationCheck()
}
}