667 lines
20 KiB
Go

package routing
import (
"context"
"errors"
"fmt"
"reflect"
"strconv"
"strings"
gobgpapi "github.com/osrg/gobgp/api"
v1core "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"github.com/cloudnativelabs/kube-router/pkg/utils"
)
// AddPolicies adds BGP import and export policies
func (nrc *NetworkRoutingController) AddPolicies() error {
// we are rr server do not add export policies
if nrc.bgpRRServer {
return nil
}
err := nrc.addPodCidrDefinedSet()
if err != nil {
klog.Errorf("Failed to add `podcidrdefinedset` defined set: %s", err)
}
err = nrc.addServiceVIPsDefinedSet()
if err != nil {
klog.Errorf("Failed to add `servicevipsdefinedset` defined set: %s", err)
}
err = nrc.addDefaultRouteDefinedSet()
if err != nil {
klog.Errorf("Failed to add `defaultroutedefinedset` defined set: %s", err)
}
iBGPPeerCIDRs, err := nrc.addiBGPPeersDefinedSet()
if err != nil {
klog.Errorf("Failed to add `iBGPpeerset` defined set: %s", err)
}
externalBGPPeerCIDRs, err := nrc.addExternalBGPPeersDefinedSet()
if err != nil {
klog.Errorf("Failed to add `externalpeerset` defined set: %s", err)
}
err = nrc.addAllBGPPeersDefinedSet(iBGPPeerCIDRs, externalBGPPeerCIDRs)
if err != nil {
klog.Errorf("Failed to add `allpeerset` defined set: %s", err)
}
err = nrc.addExportPolicies()
if err != nil {
return err
}
err = nrc.addImportPolicies()
if err != nil {
return err
}
return nil
}
// create a defined set to represent just the pod CIDR associated with the node
func (nrc *NetworkRoutingController) addPodCidrDefinedSet() error {
var currentDefinedSet *gobgpapi.DefinedSet
err := nrc.bgpServer.ListDefinedSet(context.Background(),
&gobgpapi.ListDefinedSetRequest{DefinedType: gobgpapi.DefinedType_PREFIX, Name: "podcidrdefinedset"},
func(ds *gobgpapi.DefinedSet) {
currentDefinedSet = ds
})
if err != nil {
return err
}
if currentDefinedSet == nil {
cidrLen, err := strconv.Atoi(strings.Split(nrc.podCidr, "/")[1])
if err != nil || cidrLen < 0 || cidrLen > 32 {
return fmt.Errorf("the pod CIDR IP given is not a proper mask: %d", cidrLen)
}
podCidrDefinedSet := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_PREFIX,
Name: "podcidrdefinedset",
Prefixes: []*gobgpapi.Prefix{
{
IpPrefix: nrc.podCidr,
MaskLengthMin: uint32(cidrLen),
MaskLengthMax: uint32(cidrLen),
},
},
}
return nrc.bgpServer.AddDefinedSet(context.Background(),
&gobgpapi.AddDefinedSetRequest{DefinedSet: podCidrDefinedSet})
}
return nil
}
// create a defined set to represent all the advertisable IP associated with the services
func (nrc *NetworkRoutingController) addServiceVIPsDefinedSet() error {
var currentDefinedSet *gobgpapi.DefinedSet
err := nrc.bgpServer.ListDefinedSet(context.Background(),
&gobgpapi.ListDefinedSetRequest{DefinedType: gobgpapi.DefinedType_PREFIX, Name: "servicevipsdefinedset"},
func(ds *gobgpapi.DefinedSet) {
currentDefinedSet = ds
})
if err != nil {
return err
}
advIPPrefixList := make([]*gobgpapi.Prefix, 0)
advIps, _, _ := nrc.getAllVIPs()
for _, ip := range advIps {
advIPPrefixList = append(advIPPrefixList,
&gobgpapi.Prefix{IpPrefix: ip + "/32", MaskLengthMin: 32, MaskLengthMax: 32})
}
if currentDefinedSet == nil {
clusterIPPrefixSet := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_PREFIX,
Name: "servicevipsdefinedset",
Prefixes: advIPPrefixList,
}
return nrc.bgpServer.AddDefinedSet(context.Background(),
&gobgpapi.AddDefinedSetRequest{DefinedSet: clusterIPPrefixSet})
}
if reflect.DeepEqual(advIPPrefixList, currentDefinedSet.Prefixes) {
return nil
}
toAdd := make([]*gobgpapi.Prefix, 0)
toDelete := make([]*gobgpapi.Prefix, 0)
for _, prefix := range advIPPrefixList {
add := true
for _, currentPrefix := range currentDefinedSet.Prefixes {
if currentPrefix.IpPrefix == prefix.IpPrefix {
add = false
}
}
if add {
toAdd = append(toAdd, prefix)
}
}
for _, currentPrefix := range currentDefinedSet.Prefixes {
shouldDelete := true
for _, prefix := range advIPPrefixList {
if currentPrefix.IpPrefix == prefix.IpPrefix {
shouldDelete = false
}
}
if shouldDelete {
toDelete = append(toDelete, currentPrefix)
}
}
clusterIPPrefixSet := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_PREFIX,
Name: "servicevipsdefinedset",
Prefixes: toAdd,
}
err = nrc.bgpServer.AddDefinedSet(context.Background(),
&gobgpapi.AddDefinedSetRequest{DefinedSet: clusterIPPrefixSet})
if err != nil {
return err
}
clusterIPPrefixSet = &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_PREFIX,
Name: "servicevipsdefinedset",
Prefixes: toDelete,
}
err = nrc.bgpServer.DeleteDefinedSet(context.Background(),
&gobgpapi.DeleteDefinedSetRequest{DefinedSet: clusterIPPrefixSet, All: false})
if err != nil {
return err
}
return nil
}
// create a defined set to represent just the host default route
func (nrc *NetworkRoutingController) addDefaultRouteDefinedSet() error {
var currentDefinedSet *gobgpapi.DefinedSet
err := nrc.bgpServer.ListDefinedSet(context.Background(),
&gobgpapi.ListDefinedSetRequest{DefinedType: gobgpapi.DefinedType_PREFIX, Name: "defaultroutedefinedset"},
func(ds *gobgpapi.DefinedSet) {
currentDefinedSet = ds
})
if err != nil {
return err
}
if currentDefinedSet == nil {
cidrLen := 0
defaultRouteDefinedSet := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_PREFIX,
Name: "defaultroutedefinedset",
Prefixes: []*gobgpapi.Prefix{
{
IpPrefix: "0.0.0.0/0",
MaskLengthMin: uint32(cidrLen),
MaskLengthMax: uint32(cidrLen),
},
},
}
return nrc.bgpServer.AddDefinedSet(context.Background(),
&gobgpapi.AddDefinedSetRequest{DefinedSet: defaultRouteDefinedSet})
}
return nil
}
func (nrc *NetworkRoutingController) addiBGPPeersDefinedSet() ([]string, error) {
iBGPPeerCIDRs := make([]string, 0)
if !nrc.bgpEnableInternal {
return iBGPPeerCIDRs, nil
}
// Get the current list of the nodes from the local cache
nodes := nrc.nodeLister.List()
for _, node := range nodes {
nodeObj := node.(*v1core.Node)
nodeIP, err := utils.GetNodeIP(nodeObj)
if err != nil {
klog.Errorf("Failed to find a node IP and therefore cannot add internal BGP Peer: %v", err)
continue
}
iBGPPeerCIDRs = append(iBGPPeerCIDRs, nodeIP.String()+"/32")
}
var currentDefinedSet *gobgpapi.DefinedSet
err := nrc.bgpServer.ListDefinedSet(context.Background(),
&gobgpapi.ListDefinedSetRequest{DefinedType: gobgpapi.DefinedType_NEIGHBOR, Name: "iBGPpeerset"},
func(ds *gobgpapi.DefinedSet) {
currentDefinedSet = ds
})
if err != nil {
return iBGPPeerCIDRs, err
}
if currentDefinedSet == nil {
iBGPPeerNS := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_NEIGHBOR,
Name: "iBGPpeerset",
List: iBGPPeerCIDRs,
}
err = nrc.bgpServer.AddDefinedSet(context.Background(), &gobgpapi.AddDefinedSetRequest{DefinedSet: iBGPPeerNS})
return iBGPPeerCIDRs, err
}
if reflect.DeepEqual(iBGPPeerCIDRs, currentDefinedSet.List) {
return iBGPPeerCIDRs, nil
}
toAdd := make([]string, 0)
toDelete := make([]string, 0)
for _, prefix := range iBGPPeerCIDRs {
add := true
for _, currentPrefix := range currentDefinedSet.List {
if prefix == currentPrefix {
add = false
}
}
if add {
toAdd = append(toAdd, prefix)
}
}
for _, currentPrefix := range currentDefinedSet.List {
shouldDelete := true
for _, prefix := range iBGPPeerCIDRs {
if currentPrefix == prefix {
shouldDelete = false
}
}
if shouldDelete {
toDelete = append(toDelete, currentPrefix)
}
}
iBGPPeerNS := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_NEIGHBOR,
Name: "iBGPpeerset",
List: toAdd,
}
err = nrc.bgpServer.AddDefinedSet(context.Background(), &gobgpapi.AddDefinedSetRequest{DefinedSet: iBGPPeerNS})
if err != nil {
return iBGPPeerCIDRs, err
}
iBGPPeerNS = &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_NEIGHBOR,
Name: "iBGPpeerset",
List: toDelete,
}
err = nrc.bgpServer.DeleteDefinedSet(context.Background(),
&gobgpapi.DeleteDefinedSetRequest{DefinedSet: iBGPPeerNS, All: false})
if err != nil {
return iBGPPeerCIDRs, err
}
return iBGPPeerCIDRs, nil
}
func (nrc *NetworkRoutingController) addExternalBGPPeersDefinedSet() ([]string, error) {
var currentDefinedSet *gobgpapi.DefinedSet
externalBgpPeers := make([]string, 0)
externalBGPPeerCIDRs := make([]string, 0)
err := nrc.bgpServer.ListDefinedSet(context.Background(),
&gobgpapi.ListDefinedSetRequest{DefinedType: gobgpapi.DefinedType_NEIGHBOR, Name: "externalpeerset"},
func(ds *gobgpapi.DefinedSet) {
currentDefinedSet = ds
})
if err != nil {
return externalBGPPeerCIDRs, err
}
if len(nrc.globalPeerRouters) > 0 {
for _, peer := range nrc.globalPeerRouters {
externalBgpPeers = append(externalBgpPeers, peer.Conf.NeighborAddress)
}
}
if len(nrc.nodePeerRouters) > 0 {
externalBgpPeers = append(externalBgpPeers, nrc.nodePeerRouters...)
}
if len(externalBgpPeers) == 0 {
return externalBGPPeerCIDRs, nil
}
for _, peer := range externalBgpPeers {
externalBGPPeerCIDRs = append(externalBGPPeerCIDRs, peer+"/32")
}
if currentDefinedSet == nil {
eBGPPeerNS := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_NEIGHBOR,
Name: "externalpeerset",
List: externalBGPPeerCIDRs,
}
err = nrc.bgpServer.AddDefinedSet(context.Background(), &gobgpapi.AddDefinedSetRequest{DefinedSet: eBGPPeerNS})
return externalBGPPeerCIDRs, err
}
return externalBGPPeerCIDRs, nil
}
// a slice of all peers is used as a match condition for reject statement of servicevipsdefinedset import policy
func (nrc *NetworkRoutingController) addAllBGPPeersDefinedSet(iBGPPeerCIDRs, externalBGPPeerCIDRs []string) error {
var currentDefinedSet *gobgpapi.DefinedSet
err := nrc.bgpServer.ListDefinedSet(context.Background(),
&gobgpapi.ListDefinedSetRequest{DefinedType: gobgpapi.DefinedType_NEIGHBOR, Name: "allpeerset"},
func(ds *gobgpapi.DefinedSet) {
currentDefinedSet = ds
})
if err != nil {
return err
}
// nolint:gocritic // We intentionally append to a different array here so as to not change the passed
// in externalBGPPeerCIDRs
allBgpPeers := append(externalBGPPeerCIDRs, iBGPPeerCIDRs...)
if currentDefinedSet == nil {
allPeerNS := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_NEIGHBOR,
Name: "allpeerset",
List: allBgpPeers,
}
return nrc.bgpServer.AddDefinedSet(context.Background(), &gobgpapi.AddDefinedSetRequest{DefinedSet: allPeerNS})
}
toAdd := make([]string, 0)
toDelete := make([]string, 0)
for _, peer := range allBgpPeers {
add := true
for _, currentPeer := range currentDefinedSet.List {
if peer == currentPeer {
add = false
}
}
if add {
toAdd = append(toAdd, peer)
}
}
for _, currentPeer := range currentDefinedSet.List {
shouldDelete := true
for _, peer := range allBgpPeers {
if peer == currentPeer {
shouldDelete = false
}
}
if shouldDelete {
toDelete = append(toDelete, currentPeer)
}
}
allPeerNS := &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_NEIGHBOR,
Name: "allpeerset",
List: toAdd,
}
err = nrc.bgpServer.AddDefinedSet(context.Background(), &gobgpapi.AddDefinedSetRequest{DefinedSet: allPeerNS})
if err != nil {
return err
}
allPeerNS = &gobgpapi.DefinedSet{
DefinedType: gobgpapi.DefinedType_NEIGHBOR,
Name: "allpeerset",
List: toDelete,
}
err = nrc.bgpServer.DeleteDefinedSet(context.Background(),
&gobgpapi.DeleteDefinedSetRequest{DefinedSet: allPeerNS, All: false})
if err != nil {
return err
}
return nil
}
// BGP export policies are added so that following conditions are met:
//
// - by default export of all routes from the RIB to the neighbour's is denied, and explicitly statements are added
// to permit the desired routes to be exported
// - each node is allowed to advertise its assigned pod CIDR's to all of its iBGP peer neighbours with same
// ASN if --enable-ibgp=true
// - each node is allowed to advertise its assigned pod CIDR's to all of its external BGP peer neighbours
// only if --advertise-pod-cidr flag is set to true
// - each node is NOT allowed to advertise its assigned pod CIDR's to all of its external BGP peer neighbours
// only if --advertise-pod-cidr flag is set to false
// - each node is allowed to advertise service VIP's (cluster ip, load balancer ip, external IP) ONLY to external
// BGP peers
// - each node is NOT allowed to advertise service VIP's (cluster ip, load balancer ip, external IP) to
// iBGP peers
// - an option to allow overriding the next-hop-address with the outgoing ip for external bgp peers
func (nrc *NetworkRoutingController) addExportPolicies() error {
statements := make([]*gobgpapi.Statement, 0)
var bgpActions gobgpapi.Actions
if nrc.pathPrepend {
prependAsn, err := strconv.ParseUint(nrc.pathPrependAS, 10, asnMaxBitSize)
if err != nil {
return errors.New("Invalid value for kube-router.io/path-prepend.as: " + err.Error())
}
bgpActions = gobgpapi.Actions{
AsPrepend: &gobgpapi.AsPrependAction{
Asn: uint32(prependAsn),
Repeat: uint32(nrc.pathPrependCount),
},
}
}
if nrc.bgpEnableInternal {
actions := gobgpapi.Actions{
RouteAction: gobgpapi.RouteAction_ACCEPT,
}
if nrc.overrideNextHop {
actions.Nexthop = &gobgpapi.NexthopAction{Self: true}
}
// statement to represent the export policy to permit advertising node's pod CIDR
statements = append(statements,
&gobgpapi.Statement{
Conditions: &gobgpapi.Conditions{
PrefixSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "podcidrdefinedset",
},
NeighborSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "iBGPpeerset",
},
},
Actions: &actions,
})
}
if len(nrc.globalPeerRouters) > 0 || len(nrc.nodePeerRouters) > 0 {
bgpActions.RouteAction = gobgpapi.RouteAction_ACCEPT
if nrc.overrideNextHop {
bgpActions.Nexthop = &gobgpapi.NexthopAction{Self: true}
}
// set BGP communities for the routes advertised to peers for VIPs
if len(nrc.nodeCommunities) > 0 {
bgpActions.Community = &gobgpapi.CommunityAction{
ActionType: gobgpapi.CommunityActionType_COMMUNITY_ADD,
Communities: nrc.nodeCommunities,
}
}
// statement to represent the export policy to permit advertising cluster IP's
// only to the global BGP peer or node specific BGP peer
statements = append(statements, &gobgpapi.Statement{
Conditions: &gobgpapi.Conditions{
PrefixSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "servicevipsdefinedset",
},
NeighborSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "externalpeerset",
},
},
Actions: &bgpActions,
})
if nrc.advertisePodCidr {
actions := gobgpapi.Actions{
RouteAction: gobgpapi.RouteAction_ACCEPT,
}
// set BGP communities for the routes advertised to peers for the pod network
if len(nrc.nodeCommunities) > 0 {
actions.Community = &gobgpapi.CommunityAction{
ActionType: gobgpapi.CommunityActionType_COMMUNITY_ADD,
Communities: nrc.nodeCommunities,
}
}
if nrc.overrideNextHop {
actions.Nexthop = &gobgpapi.NexthopAction{Self: true}
}
statements = append(statements, &gobgpapi.Statement{
Conditions: &gobgpapi.Conditions{
PrefixSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "podcidrdefinedset",
},
NeighborSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "externalpeerset",
},
},
Actions: &actions,
})
}
}
definition := gobgpapi.Policy{
Name: "kube_router_export",
Statements: statements,
}
policyAlreadyExists := false
checkExistingPolicy := func(existingPolicy *gobgpapi.Policy) {
if existingPolicy.Name == "kube_router_export" {
policyAlreadyExists = true
}
}
err := nrc.bgpServer.ListPolicy(context.Background(), &gobgpapi.ListPolicyRequest{}, checkExistingPolicy)
if err != nil {
return errors.New("Failed to verify if kube-router BGP export policy exists: " + err.Error())
}
if !policyAlreadyExists {
err = nrc.bgpServer.AddPolicy(context.Background(), &gobgpapi.AddPolicyRequest{Policy: &definition})
if err != nil {
return errors.New("Failed to add policy: " + err.Error())
}
}
policyAssignmentExists := false
checkExistingPolicyAssignment := func(existingPolicyAssignment *gobgpapi.PolicyAssignment) {
for _, policy := range existingPolicyAssignment.Policies {
if policy.Name == "kube_router_export" {
policyAssignmentExists = true
}
}
}
err = nrc.bgpServer.ListPolicyAssignment(context.Background(),
&gobgpapi.ListPolicyAssignmentRequest{Name: "global", Direction: gobgpapi.PolicyDirection_EXPORT},
checkExistingPolicyAssignment)
if err != nil {
return errors.New("Failed to verify if kube-router BGP export policy assignment exists: " + err.Error())
}
policyAssignment := gobgpapi.PolicyAssignment{
Name: "global",
Direction: gobgpapi.PolicyDirection_EXPORT,
Policies: []*gobgpapi.Policy{&definition},
DefaultAction: gobgpapi.RouteAction_REJECT,
}
if !policyAssignmentExists {
err = nrc.bgpServer.AddPolicyAssignment(context.Background(),
&gobgpapi.AddPolicyAssignmentRequest{Assignment: &policyAssignment})
if err != nil {
return errors.New("Failed to add policy assignment: " + err.Error())
}
}
return nil
}
// BGP import policies are added so that the following conditions are met:
// - do not import Service VIPs advertised from any peers, instead each kube-router originates and injects
// Service VIPs into local rib.
func (nrc *NetworkRoutingController) addImportPolicies() error {
statements := make([]*gobgpapi.Statement, 0)
actions := gobgpapi.Actions{
RouteAction: gobgpapi.RouteAction_REJECT,
}
statements = append(statements, &gobgpapi.Statement{
Conditions: &gobgpapi.Conditions{
PrefixSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "servicevipsdefinedset",
},
NeighborSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "allpeerset",
},
},
Actions: &actions,
})
statements = append(statements, &gobgpapi.Statement{
Conditions: &gobgpapi.Conditions{
PrefixSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "defaultroutedefinedset",
},
NeighborSet: &gobgpapi.MatchSet{
MatchType: gobgpapi.MatchType_ANY,
Name: "allpeerset",
},
},
Actions: &actions,
})
definition := gobgpapi.Policy{
Name: "kube_router_import",
Statements: statements,
}
policyAlreadyExists := false
checkExistingPolicy := func(existingPolicy *gobgpapi.Policy) {
if existingPolicy.Name == "kube_router_import" {
policyAlreadyExists = true
}
}
err := nrc.bgpServer.ListPolicy(context.Background(), &gobgpapi.ListPolicyRequest{}, checkExistingPolicy)
if err != nil {
return errors.New("Failed to verify if kube-router BGP import policy exists: " + err.Error())
}
if !policyAlreadyExists {
err = nrc.bgpServer.AddPolicy(context.Background(), &gobgpapi.AddPolicyRequest{Policy: &definition})
if err != nil {
return errors.New("Failed to add policy: " + err.Error())
}
}
policyAssignmentExists := false
checkExistingPolicyAssignment := func(existingPolicyAssignment *gobgpapi.PolicyAssignment) {
for _, policy := range existingPolicyAssignment.Policies {
if policy.Name == "kube_router_import" {
policyAssignmentExists = true
}
}
}
err = nrc.bgpServer.ListPolicyAssignment(context.Background(),
&gobgpapi.ListPolicyAssignmentRequest{Name: "global", Direction: gobgpapi.PolicyDirection_IMPORT},
checkExistingPolicyAssignment)
if err != nil {
return errors.New("Failed to verify if kube-router BGP import policy assignment exists: " + err.Error())
}
policyAssignment := gobgpapi.PolicyAssignment{
Name: "global",
Direction: gobgpapi.PolicyDirection_IMPORT,
Policies: []*gobgpapi.Policy{&definition},
DefaultAction: gobgpapi.RouteAction_ACCEPT,
}
if !policyAssignmentExists {
err = nrc.bgpServer.AddPolicyAssignment(context.Background(),
&gobgpapi.AddPolicyAssignmentRequest{Assignment: &policyAssignment})
if err != nil {
return errors.New("Failed to add policy assignment: " + err.Error())
}
}
return nil
}