mirror of
https://github.com/cloudnativelabs/kube-router.git
synced 2025-09-27 11:01:11 +02:00
This prepares the way for broader refactors in the way that we handle nodes by: * Separating frequently used node logic from the controller creation steps * Keeping reused code DRY-er * Adding interface abstractions for key groups of node data and starting to rely on those more rather than concrete types * Separating node data from the rest of the controller data structure so that it smaller definitions of data can be passed around to functions that need it rather than always passing the entire controller which contains more data / surface area than most functions need.
432 lines
15 KiB
Go
432 lines
15 KiB
Go
package routing
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/cloudnativelabs/kube-router/v2/pkg/metrics"
|
|
"github.com/cloudnativelabs/kube-router/v2/pkg/options"
|
|
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
|
|
gobgpapi "github.com/osrg/gobgp/v3/api"
|
|
gobgp "github.com/osrg/gobgp/v3/pkg/server"
|
|
v1core "k8s.io/api/core/v1"
|
|
"k8s.io/client-go/tools/cache"
|
|
"k8s.io/klog/v2"
|
|
)
|
|
|
|
// Refresh the peer relationship with rest of the nodes in the cluster (iBGP peers). Node add/remove
|
|
// events should ensure peer relationship with only currently active nodes. In case
|
|
// we miss any events from API server this method which is called periodically
|
|
// ensures peer relationship with removed nodes is deleted.
|
|
func (nrc *NetworkRoutingController) syncInternalPeers() {
|
|
nrc.mu.Lock()
|
|
defer nrc.mu.Unlock()
|
|
|
|
start := time.Now()
|
|
defer func() {
|
|
endTime := time.Since(start)
|
|
if nrc.MetricsEnabled {
|
|
metrics.ControllerBGPInternalPeersSyncTime.Observe(endTime.Seconds())
|
|
}
|
|
klog.V(2).Infof("Syncing BGP peers for the node took %v", endTime)
|
|
}()
|
|
|
|
// get the current list of the nodes from API server
|
|
nodes := nrc.nodeLister.List()
|
|
|
|
if nrc.MetricsEnabled {
|
|
metrics.ControllerBPGpeers.Set(float64(len(nodes)))
|
|
}
|
|
// establish peer and add Pod CIDRs with current set of nodes
|
|
currentNodes := make([]string, 0)
|
|
for _, obj := range nodes {
|
|
node := obj.(*v1core.Node)
|
|
targetNode, err := utils.NewRemoteKRNode(node)
|
|
if err != nil {
|
|
klog.Errorf("failed to create KRNode from node object: %v", err)
|
|
continue
|
|
}
|
|
|
|
// skip self
|
|
if targetNode.GetPrimaryNodeIP().Equal(nrc.krNode.GetPrimaryNodeIP()) {
|
|
continue
|
|
}
|
|
|
|
// we are rr-client peer only with rr-server
|
|
if nrc.bgpRRClient {
|
|
if _, ok := node.ObjectMeta.Annotations[rrServerAnnotation]; !ok {
|
|
continue
|
|
}
|
|
}
|
|
|
|
// if node full mesh is not requested then just peer with nodes with same ASN
|
|
// (run iBGP among same ASN peers)
|
|
if !nrc.bgpFullMeshMode {
|
|
nodeasn, ok := node.ObjectMeta.Annotations[nodeASNAnnotation]
|
|
if !ok {
|
|
klog.Infof("Not peering with the Node %s as ASN number of the node is unknown.",
|
|
targetNode.GetPrimaryNodeIP().String())
|
|
continue
|
|
}
|
|
|
|
asnNo, err := strconv.ParseUint(nodeasn, 0, asnMaxBitSize)
|
|
if err != nil {
|
|
klog.Infof("Not peering with the Node %s as ASN number of the node is invalid.",
|
|
targetNode.GetPrimaryNodeIP().String())
|
|
continue
|
|
}
|
|
|
|
// if the nodes ASN number is different from ASN number of current node skip peering
|
|
if nrc.nodeAsnNumber != uint32(asnNo) {
|
|
klog.Infof("Not peering with the Node %s as ASN number of the node is different.",
|
|
targetNode.GetPrimaryNodeIP().String())
|
|
continue
|
|
}
|
|
}
|
|
|
|
targetNodeIsIPv4 := targetNode.GetPrimaryNodeIP().To4() != nil
|
|
sourceNodeIsIPv4 := nrc.krNode.GetPrimaryNodeIP().To4() != nil
|
|
|
|
if targetNodeIsIPv4 != sourceNodeIsIPv4 {
|
|
klog.Warningf("Not peering with Node %s as it's primary IP (%s) uses a different protocol than "+
|
|
"our primary IP (%s)", node.ObjectMeta.Name, targetNode.GetPrimaryNodeIP(),
|
|
nrc.krNode.GetPrimaryNodeIP())
|
|
continue
|
|
}
|
|
|
|
currentNodes = append(currentNodes, targetNode.GetPrimaryNodeIP().String())
|
|
nrc.activeNodes[targetNode.GetPrimaryNodeIP().String()] = true
|
|
// explicitly set neighbors.transport.config.local-address with primaryIP which is configured
|
|
// as their neighbor address at the remote peers.
|
|
// this prevents the controller from initiating connection to its peers with a different IP address
|
|
// when multiple L3 interfaces are active.
|
|
n := &gobgpapi.Peer{
|
|
Conf: &gobgpapi.PeerConf{
|
|
NeighborAddress: targetNode.GetPrimaryNodeIP().String(),
|
|
PeerAsn: nrc.nodeAsnNumber,
|
|
},
|
|
Transport: &gobgpapi.Transport{
|
|
LocalAddress: nrc.krNode.GetPrimaryNodeIP().String(),
|
|
RemotePort: nrc.bgpPort,
|
|
},
|
|
}
|
|
|
|
if nrc.bgpGracefulRestart {
|
|
n.GracefulRestart = &gobgpapi.GracefulRestart{
|
|
Enabled: true,
|
|
RestartTime: uint32(nrc.bgpGracefulRestartTime.Seconds()),
|
|
DeferralTime: uint32(nrc.bgpGracefulRestartDeferralTime.Seconds()),
|
|
LocalRestarting: true,
|
|
}
|
|
|
|
// We choose to only peer using the protocol of the node's primary IP
|
|
if targetNode.IsIPv4Capable() {
|
|
afiSafi := gobgpapi.AfiSafi{
|
|
Config: &gobgpapi.AfiSafiConfig{
|
|
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP, Safi: gobgpapi.Family_SAFI_UNICAST},
|
|
Enabled: true,
|
|
},
|
|
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
|
|
Config: &gobgpapi.MpGracefulRestartConfig{
|
|
Enabled: true,
|
|
},
|
|
State: &gobgpapi.MpGracefulRestartState{},
|
|
},
|
|
}
|
|
n.AfiSafis = append(n.AfiSafis, &afiSafi)
|
|
}
|
|
if targetNode.IsIPv6Capable() {
|
|
afiSafi := gobgpapi.AfiSafi{
|
|
Config: &gobgpapi.AfiSafiConfig{
|
|
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP6, Safi: gobgpapi.Family_SAFI_UNICAST},
|
|
Enabled: true,
|
|
},
|
|
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
|
|
Config: &gobgpapi.MpGracefulRestartConfig{
|
|
Enabled: true,
|
|
},
|
|
State: &gobgpapi.MpGracefulRestartState{},
|
|
},
|
|
}
|
|
n.AfiSafis = append(n.AfiSafis, &afiSafi)
|
|
}
|
|
}
|
|
|
|
// we are rr-server peer with other rr-client with reflection enabled
|
|
if nrc.bgpRRServer {
|
|
if _, ok := node.ObjectMeta.Annotations[rrClientAnnotation]; ok {
|
|
// add rr options with clusterId
|
|
n.RouteReflector = &gobgpapi.RouteReflector{
|
|
RouteReflectorClient: true,
|
|
RouteReflectorClusterId: fmt.Sprint(nrc.bgpClusterID),
|
|
}
|
|
}
|
|
}
|
|
|
|
// TODO: check if a node is already added as neighbor in a better way than add and catch error
|
|
if err := nrc.bgpServer.AddPeer(context.Background(), &gobgpapi.AddPeerRequest{
|
|
Peer: n,
|
|
}); err != nil {
|
|
if !strings.Contains(err.Error(), "can't overwrite the existing peer") {
|
|
klog.Errorf("Failed to add node %s as peer due to %s", targetNode.GetPrimaryNodeIP(), err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// find the list of the node removed, from the last known list of active nodes
|
|
removedNodes := make([]string, 0)
|
|
for ip := range nrc.activeNodes {
|
|
stillActive := false
|
|
for _, node := range currentNodes {
|
|
if ip == node {
|
|
stillActive = true
|
|
break
|
|
}
|
|
}
|
|
if !stillActive {
|
|
removedNodes = append(removedNodes, ip)
|
|
}
|
|
}
|
|
|
|
// delete the neighbor for the nodes that are removed
|
|
for _, ip := range removedNodes {
|
|
if err := nrc.bgpServer.DeletePeer(context.Background(), &gobgpapi.DeletePeerRequest{Address: ip}); err != nil {
|
|
klog.Errorf("Failed to remove node %s as peer due to %s", ip, err)
|
|
}
|
|
delete(nrc.activeNodes, ip)
|
|
}
|
|
}
|
|
|
|
// connectToExternalBGPPeers adds all the configured eBGP peers (global or node specific) as neighbours
|
|
func (nrc *NetworkRoutingController) connectToExternalBGPPeers(server *gobgp.BgpServer, peerNeighbors []*gobgpapi.Peer,
|
|
bgpGracefulRestart bool, bgpGracefulRestartDeferralTime time.Duration, bgpGracefulRestartTime time.Duration,
|
|
peerMultihopTTL uint8) error {
|
|
for _, n := range peerNeighbors {
|
|
neighborIPStr := n.Conf.NeighborAddress
|
|
neighborIP := net.ParseIP(neighborIPStr)
|
|
if neighborIP == nil {
|
|
klog.Errorf("unable to parse CIDR of global peer (%s), not peering with this peer",
|
|
neighborIPStr)
|
|
continue
|
|
}
|
|
peeringAddressForNeighbor := net.ParseIP(n.Transport.LocalAddress)
|
|
if peeringAddressForNeighbor == nil {
|
|
klog.Errorf("unable to parse our local address for peer (%s), not peering with this peer (%s)",
|
|
n.Transport.LocalAddress, neighborIPStr)
|
|
}
|
|
|
|
neighborIsIPv4 := neighborIP.To4() != nil
|
|
peeringAddressIsIPv4 := peeringAddressForNeighbor.To4() != nil
|
|
if neighborIsIPv4 != peeringAddressIsIPv4 {
|
|
klog.Warningf("Not peering with configured peer as it's primary IP (%s) uses a different "+
|
|
"protocol than our configured local-address (%s). Its possible that this can be resolved by setting "+
|
|
"the local address appropriately", neighborIP, peeringAddressForNeighbor)
|
|
continue
|
|
}
|
|
|
|
if bgpGracefulRestart {
|
|
n.GracefulRestart = &gobgpapi.GracefulRestart{
|
|
Enabled: true,
|
|
RestartTime: uint32(bgpGracefulRestartTime.Seconds()),
|
|
DeferralTime: uint32(bgpGracefulRestartDeferralTime.Seconds()),
|
|
LocalRestarting: true,
|
|
}
|
|
|
|
if nrc.krNode.IsIPv4Capable() {
|
|
n.AfiSafis = []*gobgpapi.AfiSafi{
|
|
{
|
|
Config: &gobgpapi.AfiSafiConfig{
|
|
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP, Safi: gobgpapi.Family_SAFI_UNICAST},
|
|
Enabled: true,
|
|
},
|
|
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
|
|
Config: &gobgpapi.MpGracefulRestartConfig{
|
|
Enabled: true,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
if nrc.krNode.IsIPv6Capable() {
|
|
afiSafi := gobgpapi.AfiSafi{
|
|
Config: &gobgpapi.AfiSafiConfig{
|
|
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP6, Safi: gobgpapi.Family_SAFI_UNICAST},
|
|
Enabled: true,
|
|
},
|
|
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
|
|
Config: &gobgpapi.MpGracefulRestartConfig{
|
|
Enabled: true,
|
|
},
|
|
},
|
|
}
|
|
n.AfiSafis = append(n.AfiSafis, &afiSafi)
|
|
}
|
|
}
|
|
if peerMultihopTTL > 1 {
|
|
n.EbgpMultihop = &gobgpapi.EbgpMultihop{
|
|
Enabled: true,
|
|
MultihopTtl: uint32(peerMultihopTTL),
|
|
}
|
|
}
|
|
err := server.AddPeer(context.Background(), &gobgpapi.AddPeerRequest{Peer: n})
|
|
if err != nil {
|
|
return fmt.Errorf("error peering with peer router "+
|
|
"%q due to: %s", n.Conf.NeighborAddress, err)
|
|
}
|
|
klog.V(2).Infof("Successfully configured %s in ASN %v as BGP peer to the node",
|
|
n.Conf.NeighborAddress, n.Conf.PeerAsn)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Does validation and returns neighbor configs
|
|
func newGlobalPeers(ips []net.IP, ports []uint32, asns []uint32, passwords []string, localips []string,
|
|
holdtime float64, localAddress string) ([]*gobgpapi.Peer, error) {
|
|
peers := make([]*gobgpapi.Peer, 0)
|
|
|
|
// Validations
|
|
if len(ips) != len(asns) {
|
|
return nil, errors.New("invalid peer router config, the number of IPs and ASN numbers must be equal")
|
|
}
|
|
|
|
if len(ips) != len(passwords) && len(passwords) != 0 {
|
|
return nil, errors.New("invalid peer router config. The number of passwords should either be zero, or " +
|
|
"one per peer router. Use blank items if a router doesn't expect a password. Example: \"pass,,pass\" " +
|
|
"OR [\"pass\",\"\",\"pass\"]")
|
|
}
|
|
|
|
if len(ips) != len(ports) && len(ports) != 0 {
|
|
return nil, fmt.Errorf("invalid peer router config. The number of ports should either be zero, or "+
|
|
"one per peer router. If blank items are used, it will default to standard BGP port, %s. "+
|
|
"Example: \"port,,port\" OR [\"port\",\"\",\"port\"]", strconv.Itoa(options.DefaultBgpPort))
|
|
}
|
|
|
|
if len(ips) != len(localips) && len(localips) != 0 {
|
|
return nil, fmt.Errorf("invalid peer router config. The number of localIPs should either be zero, or "+
|
|
"one per peer router. If blank items are used, it will default to nodeIP, %s. "+
|
|
"Example: \"10.1.1.1,,10.1.1.2\" OR [\"10.1.1.1\",\"\",\"10.1.1.2\"]", localAddress)
|
|
}
|
|
|
|
for i := 0; i < len(ips); i++ {
|
|
if !((asns[i] >= 1 && asns[i] <= 23455) ||
|
|
(asns[i] >= 23457 && asns[i] <= 63999) ||
|
|
(asns[i] >= 64512 && asns[i] <= 65534) ||
|
|
(asns[i] >= 131072 && asns[i] <= 4199999999) ||
|
|
(asns[i] >= 4200000000 && asns[i] <= 4294967294)) {
|
|
return nil, fmt.Errorf("reserved ASN number \"%d\" for global BGP peer",
|
|
asns[i])
|
|
}
|
|
|
|
// explicitly set neighbors.transport.config.local-address with primaryIP which is configured
|
|
// as their neighbor address at the remote peers.
|
|
// this prevents the controller from initiating connection to its peers with a different IP address
|
|
// when multiple L3 interfaces are active.
|
|
peer := &gobgpapi.Peer{
|
|
Conf: &gobgpapi.PeerConf{
|
|
NeighborAddress: ips[i].String(),
|
|
PeerAsn: asns[i],
|
|
},
|
|
Timers: &gobgpapi.Timers{Config: &gobgpapi.TimersConfig{HoldTime: uint64(holdtime)}},
|
|
Transport: &gobgpapi.Transport{
|
|
// localAddress defaults to the node's primary IP, but can be overridden below on a peer-by-peer basis
|
|
// below via the kube-router.io/peer.localips annotation
|
|
LocalAddress: localAddress,
|
|
RemotePort: options.DefaultBgpPort,
|
|
},
|
|
}
|
|
|
|
if len(ports) != 0 {
|
|
peer.Transport.RemotePort = ports[i]
|
|
}
|
|
|
|
if len(passwords) != 0 {
|
|
peer.Conf.AuthPassword = passwords[i]
|
|
}
|
|
|
|
// if localip is set and is non-blank for BGP configuration override primaryIP choice set for peer above
|
|
if len(localips) != 0 && localips[i] != "" {
|
|
peer.Transport.LocalAddress = localips[i]
|
|
}
|
|
|
|
peers = append(peers, peer)
|
|
}
|
|
|
|
return peers, nil
|
|
}
|
|
|
|
func (nrc *NetworkRoutingController) newNodeEventHandler() cache.ResourceEventHandler {
|
|
return cache.ResourceEventHandlerFuncs{
|
|
AddFunc: func(obj interface{}) {
|
|
node := obj.(*v1core.Node)
|
|
targetNode, err := utils.NewRemoteKRNode(node)
|
|
if err != nil {
|
|
klog.Errorf("failed to create KRNode from node object: %v", err)
|
|
return
|
|
}
|
|
|
|
klog.V(2).Infof("Received node %s added update from watch API so peer with new node",
|
|
targetNode.GetPrimaryNodeIP())
|
|
nrc.OnNodeUpdate(obj)
|
|
},
|
|
UpdateFunc: func(oldObj, newObj interface{}) {
|
|
// we are only interested in node add/delete, so skip update
|
|
},
|
|
DeleteFunc: func(obj interface{}) {
|
|
node, ok := obj.(*v1core.Node)
|
|
if !ok {
|
|
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
|
|
if !ok {
|
|
klog.Errorf("unexpected object type: %v", obj)
|
|
return
|
|
}
|
|
if node, ok = tombstone.Obj.(*v1core.Node); !ok {
|
|
klog.Errorf("unexpected object type: %v", obj)
|
|
return
|
|
}
|
|
}
|
|
targetNode, err := utils.NewRemoteKRNode(node)
|
|
// In this case even if we can't get the NodeIP that's alright as the node is being removed anyway and
|
|
// future node lister operations that happen in OnNodeUpdate won't be affected as the node won't be returned
|
|
if err == nil && targetNode != nil {
|
|
klog.Infof("Received node %s removed update from watch API, so remove node from peer",
|
|
targetNode.GetPrimaryNodeIP())
|
|
} else {
|
|
klog.Infof("Received node (IP unavailable) removed update from watch API, so remove node " +
|
|
"from peer")
|
|
}
|
|
|
|
nrc.OnNodeUpdate(obj)
|
|
},
|
|
}
|
|
}
|
|
|
|
// OnNodeUpdate Handle updates from Node watcher. Node watcher calls this method whenever there is
|
|
// new node is added or old node is deleted. So peer up with new node and drop peering
|
|
// from old node
|
|
func (nrc *NetworkRoutingController) OnNodeUpdate(_ interface{}) {
|
|
if !nrc.bgpServerStarted {
|
|
return
|
|
}
|
|
|
|
// update export policies so that NeighborSet gets updated with new set of nodes
|
|
err := nrc.AddPolicies()
|
|
if err != nil {
|
|
klog.Errorf("Error adding BGP policies: %s", err.Error())
|
|
}
|
|
|
|
if nrc.bgpEnableInternal {
|
|
nrc.syncInternalPeers()
|
|
}
|
|
|
|
// skip if first round of disableSourceDestinationCheck() is not done yet, this is to prevent
|
|
// all the nodes for all the node add update trying to perfrom disableSourceDestinationCheck
|
|
if nrc.disableSrcDstCheck && nrc.initSrcDstCheckDone && nrc.ec2IamAuthorized {
|
|
nrc.disableSourceDestinationCheck()
|
|
}
|
|
}
|