mirror of
https://github.com/cloudnativelabs/kube-router.git
synced 2025-09-26 02:21:03 +02:00
407 lines
14 KiB
Go
407 lines
14 KiB
Go
package routing
|
|
|
|
import (
|
|
"bufio"
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"os/exec"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
|
|
gobgpapi "github.com/osrg/gobgp/v3/api"
|
|
"github.com/osrg/gobgp/v3/pkg/packet/bgp"
|
|
"github.com/vishvananda/netlink/nl"
|
|
v1core "k8s.io/api/core/v1"
|
|
"k8s.io/klog/v2"
|
|
|
|
"github.com/vishvananda/netlink"
|
|
)
|
|
|
|
// Used for processing Annotations that may contain multiple items
|
|
// Pass this the string and the delimiter
|
|
//
|
|
//nolint:unparam // while delimiter is always "," for now it provides flexibility to leave the function this way
|
|
func stringToSlice(s, d string) []string {
|
|
ss := make([]string, 0)
|
|
if strings.Contains(s, d) {
|
|
ss = strings.Split(s, d)
|
|
} else {
|
|
ss = append(ss, s)
|
|
}
|
|
return ss
|
|
}
|
|
|
|
func stringSliceToIPs(s []string) ([]net.IP, error) {
|
|
ips := make([]net.IP, 0)
|
|
for _, ipString := range s {
|
|
ip := net.ParseIP(ipString)
|
|
if ip == nil {
|
|
return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipString)
|
|
}
|
|
ips = append(ips, ip)
|
|
}
|
|
return ips, nil
|
|
}
|
|
|
|
func stringSliceToIPNets(s []string) ([]net.IPNet, error) {
|
|
ipNets := make([]net.IPNet, 0)
|
|
for _, ipNetString := range s {
|
|
ip, ipNet, err := net.ParseCIDR(strings.TrimSpace(ipNetString))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not parse \"%s\" as an CIDR", ipNetString)
|
|
}
|
|
if ip == nil {
|
|
return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipNetString)
|
|
}
|
|
ipNets = append(ipNets, *ipNet)
|
|
}
|
|
return ipNets, nil
|
|
}
|
|
|
|
func stringSliceToUInt32(s []string) ([]uint32, error) {
|
|
ints := make([]uint32, 0)
|
|
for _, intString := range s {
|
|
newInt, err := strconv.ParseUint(intString, 0, 32)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not parse \"%s\" as an integer", intString)
|
|
}
|
|
ints = append(ints, uint32(newInt))
|
|
}
|
|
return ints, nil
|
|
}
|
|
|
|
func stringSliceB64Decode(s []string) ([]string, error) {
|
|
ss := make([]string, 0)
|
|
for _, b64String := range s {
|
|
decoded, err := base64.StdEncoding.DecodeString(b64String)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not parse \"%s\" as a base64 encoded string",
|
|
b64String)
|
|
}
|
|
ss = append(ss, string(decoded))
|
|
}
|
|
return ss, nil
|
|
}
|
|
|
|
func statementsEqualByName(a, b []*gobgpapi.Statement) bool {
|
|
// First a is in the outer loop ensuring that all members of a are in b
|
|
for _, st1 := range a {
|
|
st1Found := false
|
|
for _, st2 := range b {
|
|
if st1.Name == st2.Name {
|
|
st1Found = true
|
|
}
|
|
}
|
|
if !st1Found {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Second b is in the outer loop ensuring that all members of b are in a
|
|
for _, st1 := range b {
|
|
st1Found := false
|
|
for _, st2 := range a {
|
|
if st1.Name == st2.Name {
|
|
st1Found = true
|
|
}
|
|
}
|
|
if !st1Found {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// If we've made it through both loops then we know that the statements arrays are equal
|
|
return true
|
|
}
|
|
|
|
func getNodeSubnet(nodeIP net.IP) (net.IPNet, string, error) {
|
|
links, err := netlink.LinkList()
|
|
if err != nil {
|
|
return net.IPNet{}, "", errors.New("failed to get list of links")
|
|
}
|
|
for _, link := range links {
|
|
addresses, err := netlink.AddrList(link, netlink.FAMILY_ALL)
|
|
if err != nil {
|
|
return net.IPNet{}, "", errors.New("failed to get list of addr")
|
|
}
|
|
for _, addr := range addresses {
|
|
if addr.IPNet.IP.Equal(nodeIP) {
|
|
return *addr.IPNet, link.Attrs().Name, nil
|
|
}
|
|
}
|
|
}
|
|
return net.IPNet{}, "", errors.New("failed to find interface with specified node ip")
|
|
}
|
|
|
|
// generateTunnelName will generate a name for a tunnel interface given a node IP
|
|
// Since linux restricts interface names to 15 characters, we take the sha-256 of the node IP after removing
|
|
// non-entropic characters like '.' and ':', and then use the first 12 bytes of it. This allows us to cater to both
|
|
// long IPv4 addresses and much longer IPv6 addresses.
|
|
func generateTunnelName(nodeIP string) string {
|
|
// remove dots from an IPv4 address
|
|
strippedIP := strings.ReplaceAll(nodeIP, ".", "")
|
|
// remove colons from an IPv6 address
|
|
strippedIP = strings.ReplaceAll(strippedIP, ":", "")
|
|
|
|
h := sha256.New()
|
|
h.Write([]byte(strippedIP))
|
|
sum := h.Sum(nil)
|
|
|
|
return "tun-" + fmt.Sprintf("%x", sum)[0:11]
|
|
}
|
|
|
|
// validateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to
|
|
// gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it
|
|
// returns an error.
|
|
func validateCommunity(arg string) error {
|
|
_, err := strconv.ParseUint(arg, 10, bgpCommunityMaxSize)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
_regexpCommunity := regexp.MustCompile(`(\d+):(\d+)`)
|
|
elems := _regexpCommunity.FindStringSubmatch(arg)
|
|
if len(elems) == 3 {
|
|
if _, err := strconv.ParseUint(elems[1], 10, bgpCommunityMaxPartSize); err == nil {
|
|
if _, err = strconv.ParseUint(elems[2], 10, bgpCommunityMaxPartSize); err == nil {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
for _, v := range bgp.WellKnownCommunityNameMap {
|
|
if arg == v {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("failed to parse %s as community", arg)
|
|
}
|
|
|
|
// parseBGPNextHop takes in a GoBGP Path and parses out the destination's next hop from its attributes. If it
|
|
// can't parse a next hop IP from the GoBGP Path, it returns an error.
|
|
func parseBGPNextHop(path *gobgpapi.Path) (net.IP, error) {
|
|
for _, pAttr := range path.GetPattrs() {
|
|
unmarshalNew, err := pAttr.UnmarshalNew()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal path attribute: %s", err)
|
|
}
|
|
switch t := unmarshalNew.(type) {
|
|
case *gobgpapi.NextHopAttribute:
|
|
// This is the primary way that we receive NextHops and happens when both the client and the server exchange
|
|
// next hops on the same IP family that they negotiated BGP on
|
|
nextHopIP := net.ParseIP(t.NextHop)
|
|
if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) {
|
|
return nextHopIP, nil
|
|
}
|
|
return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHop)
|
|
case *gobgpapi.MpReachNLRIAttribute:
|
|
// in the case where the server and the client are exchanging next-hops that don't relate to their primary
|
|
// IP family, we get MpReachNLRIAttribute instead of NextHopAttributes
|
|
// TODO: here we only take the first next hop, at some point in the future it would probably be best to
|
|
// consider multiple next hops
|
|
nextHopIP := net.ParseIP(t.NextHops[0])
|
|
if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) {
|
|
return nextHopIP, nil
|
|
}
|
|
return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHops[0])
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("could not parse next hop received from GoBGP for path: %s", path)
|
|
}
|
|
|
|
// parseBGPPath takes in a GoBGP Path and parses out the destination subnet and the next hop from its attributes.
|
|
// If successful, it will return the destination of the BGP path as a subnet form and the next hop. If it
|
|
// can't parse the destination or the next hop IP, it returns an error.
|
|
func parseBGPPath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) {
|
|
nextHop, err := parseBGPNextHop(path)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
nlri := path.GetNlri()
|
|
var prefix gobgpapi.IPAddressPrefix
|
|
err = nlri.UnmarshalTo(&prefix)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("invalid nlri in advertised path")
|
|
}
|
|
dstSubnet, err := netlink.ParseIPNet(prefix.Prefix + "/" + fmt.Sprint(prefix.PrefixLen))
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("couldn't parse IP subnet from nlri advertised path")
|
|
}
|
|
return dstSubnet, nextHop, nil
|
|
}
|
|
|
|
// deleteRoutesByDestination attempts to safely find all routes based upon its destination subnet and delete them
|
|
func deleteRoutesByDestination(destinationSubnet *net.IPNet) error {
|
|
routes, err := netlink.RouteListFiltered(nl.FAMILY_ALL, &netlink.Route{
|
|
Dst: destinationSubnet, Protocol: zebraRouteOriginator,
|
|
}, netlink.RT_FILTER_DST|netlink.RT_FILTER_PROTOCOL)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get routes from netlink: %v", err)
|
|
}
|
|
for i, r := range routes {
|
|
klog.V(2).Infof("Found route to remove: %s", r.String())
|
|
if err = netlink.RouteDel(&routes[i]); err != nil {
|
|
return fmt.Errorf("failed to remove route due to %v", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// getPodCIDRsFromAllNodeSources gets the pod CIDRs for all available sources on a given node in a specific order. The
|
|
// order of preference is:
|
|
// 1. From the kube-router.io/pod-cidr annotation (preserves backwards compatibility)
|
|
// 2. From the kube-router.io/pod-cidrs annotation (allows the user to specify multiple CIDRs for a given node which
|
|
// seems to be closer aligned to how upstream is moving)
|
|
// 3. From the node's spec definition in node.Spec.PodCIDRs
|
|
func getPodCIDRsFromAllNodeSources(node *v1core.Node) (podCIDRs []string) {
|
|
// Prefer kube-router.io/pod-cidr as a matter of keeping backwards compatibility with previous functionality
|
|
podCIDR := node.GetAnnotations()["kube-router.io/pod-cidr"]
|
|
if podCIDR != "" {
|
|
_, _, err := net.ParseCIDR(podCIDR)
|
|
if err != nil {
|
|
klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidr annotation, skipping...", podCIDR)
|
|
} else {
|
|
podCIDRs = append(podCIDRs, podCIDR)
|
|
return podCIDRs
|
|
}
|
|
}
|
|
|
|
// Then attempt to find the annotation kube-router.io/pod-cidrs and prefer those second
|
|
cidrsAnnotation := node.GetAnnotations()["kube-router.io/pod-cidrs"]
|
|
if cidrsAnnotation != "" {
|
|
// this should contain comma separated CIDRs, any CIDRs which fail to parse we will emit a warning log for
|
|
// and skip it
|
|
cidrsAnnotArray := strings.Split(cidrsAnnotation, ",")
|
|
for _, cidr := range cidrsAnnotArray {
|
|
_, _, err := net.ParseCIDR(cidr)
|
|
if err != nil {
|
|
klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidrs annotation, skipping...",
|
|
cidr)
|
|
continue
|
|
}
|
|
podCIDRs = append(podCIDRs, cidr)
|
|
}
|
|
return podCIDRs
|
|
}
|
|
|
|
// Finally, if all else fails, use the PodCIDRs on the node spec
|
|
return node.Spec.PodCIDRs
|
|
}
|
|
|
|
// getBGPRouteInfoForVIP attempt to automatically find the subnet, BGP AFI/SAFI Family, and nexthop for a given VIP
|
|
// based upon whether it is an IPv4 address or an IPv6 address. Returns slash notation subnet as uint32 suitable for
|
|
// sending to GoBGP and an error if it is unable to determine the subnet automatically
|
|
func (nrc *NetworkRoutingController) getBGPRouteInfoForVIP(vip string) (subnet uint32, nh string,
|
|
afiFamily gobgpapi.Family_Afi, err error) {
|
|
ip := net.ParseIP(vip)
|
|
if ip == nil {
|
|
err = fmt.Errorf("could not parse VIP: %s", vip)
|
|
return
|
|
}
|
|
if ip.To4() != nil {
|
|
subnet = 32
|
|
afiFamily = gobgpapi.Family_AFI_IP
|
|
nhIP := utils.FindBestIPv4NodeAddress(nrc.primaryIP, nrc.nodeIPv4Addrs)
|
|
if nhIP == nil {
|
|
err = fmt.Errorf("could not find an IPv4 address on node to set as nexthop for vip: %s", vip)
|
|
}
|
|
nh = nhIP.String()
|
|
return
|
|
}
|
|
if ip.To16() != nil {
|
|
subnet = 128
|
|
afiFamily = gobgpapi.Family_AFI_IP6
|
|
nhIP := utils.FindBestIPv6NodeAddress(nrc.primaryIP, nrc.nodeIPv6Addrs)
|
|
if nhIP == nil {
|
|
err = fmt.Errorf("could not find an IPv6 address on node to set as nexthop for vip: %s", vip)
|
|
}
|
|
nh = nhIP.String()
|
|
return
|
|
}
|
|
err = fmt.Errorf("could not convert IP to IPv4 or IPv6, unable to find subnet for: %s", vip)
|
|
return
|
|
}
|
|
|
|
// fouPortAndProtoExist checks to see if the given FoU port is already configured on the system via iproute2
|
|
// tooling for the given protocol
|
|
//
|
|
// fou show, shows both IPv4 and IPv6 ports in the same show command, they look like:
|
|
// port 5556 gue
|
|
// port 5556 gue -6
|
|
// where the only thing that distinguishes them is the -6 or not on the end
|
|
// WARNING we're parsing a CLI tool here not an API, this may break at some point in the future
|
|
func fouPortAndProtoExist(port uint16, isIPv6 bool) bool {
|
|
const ipRoute2IPv6Prefix = "-6"
|
|
strPort := strconv.FormatInt(int64(port), 10)
|
|
fouArgs := make([]string, 0)
|
|
klog.V(2).Infof("Checking FOU Port and Proto... %s - %t", strPort, isIPv6)
|
|
|
|
if isIPv6 {
|
|
fouArgs = append(fouArgs, ipRoute2IPv6Prefix)
|
|
}
|
|
fouArgs = append(fouArgs, "fou", "show")
|
|
|
|
out, err := exec.Command("ip", fouArgs...).CombinedOutput()
|
|
// iproute2 returns an error if no fou configuration exists
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
strOut := string(out)
|
|
klog.V(2).Infof("Combined output of ip fou show: %s", strOut)
|
|
scanner := bufio.NewScanner(strings.NewReader(strOut))
|
|
|
|
// loop over all lines of output
|
|
for scanner.Scan() {
|
|
scannedLine := scanner.Text()
|
|
// if the output doesn't contain our port at all, then continue
|
|
if !strings.Contains(scannedLine, strPort) {
|
|
continue
|
|
}
|
|
|
|
// if this is IPv6 port and it has the correct IPv6 suffix (see example above) then return true
|
|
if isIPv6 && strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) {
|
|
return true
|
|
}
|
|
|
|
// if this is not IPv6 and it does not have an IPv6 suffix (see example above) then return true
|
|
if !isIPv6 && !strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// linkFOUEnabled checks to see whether the given link has FoU (Foo over Ethernet) enabled on it, specifically since
|
|
// kube-router only works with GUE (Generic UDP Encapsulation) we look for that and not just FoU in general. If the
|
|
// linkName is enabled with FoU GUE then we return true, otherwise false
|
|
//
|
|
// Output for a FoU Enabled GUE tunnel looks like:
|
|
// ipip ipip remote <ip> local <ip> dev <dev> ttl 225 pmtudisc encap gue encap-sport auto encap-dport 5555 ...
|
|
// Output for a normal IPIP tunnel looks like:
|
|
// ipip ipip remote <ip> local <ip> dev <dev> ttl inherit ...
|
|
func linkFOUEnabled(linkName string) bool {
|
|
const fouEncapEnabled = "encap gue"
|
|
cmdArgs := []string{"-details", "link", "show", linkName}
|
|
|
|
out, err := exec.Command("ip", cmdArgs...).CombinedOutput()
|
|
|
|
if err != nil {
|
|
klog.Warning("recevied an error while trying to look at the link details of %s, this shouldn't have happened",
|
|
linkName)
|
|
return false
|
|
}
|
|
|
|
if strings.Contains(string(out), fouEncapEnabled) {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|