Aaron U'Ren 9fd46cc86d fact(krnode): add node struct abstraction
This prepares the way for broader refactors in the way that we handle
nodes by:

* Separating frequently used node logic from the controller creation
  steps
* Keeping reused code DRY-er
* Adding interface abstractions for key groups of node data and starting
  to rely on those more rather than concrete types
* Separating node data from the rest of the controller data structure so
  that it smaller definitions of data can be passed around to functions
  that need it rather than always passing the entire controller which
  contains more data / surface area than most functions need.
2024-09-29 17:53:36 -05:00

412 lines
14 KiB
Go

package routing
import (
"bufio"
"crypto/sha256"
"encoding/base64"
"encoding/binary"
"errors"
"fmt"
"hash/fnv"
"net"
"os/exec"
"regexp"
"strconv"
"strings"
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
gobgpapi "github.com/osrg/gobgp/v3/api"
"github.com/osrg/gobgp/v3/pkg/packet/bgp"
"github.com/vishvananda/netlink/nl"
v1core "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"github.com/vishvananda/netlink"
)
// Used for processing Annotations that may contain multiple items
// Pass this the string and the delimiter
//
//nolint:unparam // while delimiter is always "," for now it provides flexibility to leave the function this way
func stringToSlice(s, d string) []string {
ss := make([]string, 0)
if strings.Contains(s, d) {
ss = strings.Split(s, d)
} else {
ss = append(ss, s)
}
return ss
}
func stringSliceToIPs(s []string) ([]net.IP, error) {
ips := make([]net.IP, 0)
for _, ipString := range s {
ip := net.ParseIP(ipString)
if ip == nil {
return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipString)
}
ips = append(ips, ip)
}
return ips, nil
}
func stringSliceToIPNets(s []string) ([]net.IPNet, error) {
ipNets := make([]net.IPNet, 0)
for _, ipNetString := range s {
ip, ipNet, err := net.ParseCIDR(strings.TrimSpace(ipNetString))
if err != nil {
return nil, fmt.Errorf("could not parse \"%s\" as an CIDR", ipNetString)
}
if ip == nil {
return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipNetString)
}
ipNets = append(ipNets, *ipNet)
}
return ipNets, nil
}
func stringSliceToUInt32(s []string) ([]uint32, error) {
ints := make([]uint32, 0)
for _, intString := range s {
newInt, err := strconv.ParseUint(intString, 0, 32)
if err != nil {
return nil, fmt.Errorf("could not parse \"%s\" as an integer", intString)
}
ints = append(ints, uint32(newInt))
}
return ints, nil
}
func stringSliceB64Decode(s []string) ([]string, error) {
ss := make([]string, 0)
for _, b64String := range s {
decoded, err := base64.StdEncoding.DecodeString(b64String)
if err != nil {
return nil, fmt.Errorf("could not parse \"%s\" as a base64 encoded string",
b64String)
}
ss = append(ss, string(decoded))
}
return ss, nil
}
func statementsEqualByName(a, b []*gobgpapi.Statement) bool {
// First a is in the outer loop ensuring that all members of a are in b
for _, st1 := range a {
st1Found := false
for _, st2 := range b {
if st1.Name == st2.Name {
st1Found = true
}
}
if !st1Found {
return false
}
}
// Second b is in the outer loop ensuring that all members of b are in a
for _, st1 := range b {
st1Found := false
for _, st2 := range a {
if st1.Name == st2.Name {
st1Found = true
}
}
if !st1Found {
return false
}
}
// If we've made it through both loops then we know that the statements arrays are equal
return true
}
// generateRouterID will generate a router ID based upon the user's configuration (or lack there of) and the node's
// primary IP address if the user has not specified. If the user has configured the router ID as "generate" then we
// will generate a router ID based upon fnv hashing the node's primary IP address.
func generateRouterID(nodeIPAware utils.NodeIPAware, configRouterID string) (string, error) {
switch {
case configRouterID == "generate":
h := fnv.New32a()
h.Write(nodeIPAware.GetPrimaryNodeIP())
hs := h.Sum32()
gip := make(net.IP, 4)
binary.BigEndian.PutUint32(gip, hs)
return gip.String(), nil
case configRouterID != "":
return configRouterID, nil
}
if nodeIPAware.GetPrimaryNodeIP().To4() == nil {
return "", errors.New("router-id must be specified when primary node IP is an IPv6 address")
}
return configRouterID, nil
}
// generateTunnelName will generate a name for a tunnel interface given a node IP
// Since linux restricts interface names to 15 characters, we take the sha-256 of the node IP after removing
// non-entropic characters like '.' and ':', and then use the first 12 bytes of it. This allows us to cater to both
// long IPv4 addresses and much longer IPv6 addresses.
func generateTunnelName(nodeIP string) string {
// remove dots from an IPv4 address
strippedIP := strings.ReplaceAll(nodeIP, ".", "")
// remove colons from an IPv6 address
strippedIP = strings.ReplaceAll(strippedIP, ":", "")
h := sha256.New()
h.Write([]byte(strippedIP))
sum := h.Sum(nil)
return "tun-" + fmt.Sprintf("%x", sum)[0:11]
}
// validateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to
// gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it
// returns an error.
func validateCommunity(arg string) error {
_, err := strconv.ParseUint(arg, 10, bgpCommunityMaxSize)
if err == nil {
return nil
}
_regexpCommunity := regexp.MustCompile(`(\d+):(\d+)`)
elems := _regexpCommunity.FindStringSubmatch(arg)
if len(elems) == 3 {
if _, err := strconv.ParseUint(elems[1], 10, bgpCommunityMaxPartSize); err == nil {
if _, err = strconv.ParseUint(elems[2], 10, bgpCommunityMaxPartSize); err == nil {
return nil
}
}
}
for _, v := range bgp.WellKnownCommunityNameMap {
if arg == v {
return nil
}
}
return fmt.Errorf("failed to parse %s as community", arg)
}
// parseBGPNextHop takes in a GoBGP Path and parses out the destination's next hop from its attributes. If it
// can't parse a next hop IP from the GoBGP Path, it returns an error.
func parseBGPNextHop(path *gobgpapi.Path) (net.IP, error) {
for _, pAttr := range path.GetPattrs() {
unmarshalNew, err := pAttr.UnmarshalNew()
if err != nil {
return nil, fmt.Errorf("failed to unmarshal path attribute: %s", err)
}
switch t := unmarshalNew.(type) {
case *gobgpapi.NextHopAttribute:
// This is the primary way that we receive NextHops and happens when both the client and the server exchange
// next hops on the same IP family that they negotiated BGP on
nextHopIP := net.ParseIP(t.NextHop)
if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) {
return nextHopIP, nil
}
return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHop)
case *gobgpapi.MpReachNLRIAttribute:
// in the case where the server and the client are exchanging next-hops that don't relate to their primary
// IP family, we get MpReachNLRIAttribute instead of NextHopAttributes
// TODO: here we only take the first next hop, at some point in the future it would probably be best to
// consider multiple next hops
nextHopIP := net.ParseIP(t.NextHops[0])
if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) {
return nextHopIP, nil
}
return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHops[0])
}
}
return nil, fmt.Errorf("could not parse next hop received from GoBGP for path: %s", path)
}
// parseBGPPath takes in a GoBGP Path and parses out the destination subnet and the next hop from its attributes.
// If successful, it will return the destination of the BGP path as a subnet form and the next hop. If it
// can't parse the destination or the next hop IP, it returns an error.
func parseBGPPath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) {
nextHop, err := parseBGPNextHop(path)
if err != nil {
return nil, nil, err
}
nlri := path.GetNlri()
var prefix gobgpapi.IPAddressPrefix
err = nlri.UnmarshalTo(&prefix)
if err != nil {
return nil, nil, fmt.Errorf("invalid nlri in advertised path")
}
dstSubnet, err := netlink.ParseIPNet(prefix.Prefix + "/" + fmt.Sprint(prefix.PrefixLen))
if err != nil {
return nil, nil, fmt.Errorf("couldn't parse IP subnet from nlri advertised path")
}
return dstSubnet, nextHop, nil
}
// deleteRoutesByDestination attempts to safely find all routes based upon its destination subnet and delete them
func deleteRoutesByDestination(destinationSubnet *net.IPNet) error {
routes, err := netlink.RouteListFiltered(nl.FAMILY_ALL, &netlink.Route{
Dst: destinationSubnet, Protocol: zebraRouteOriginator,
}, netlink.RT_FILTER_DST|netlink.RT_FILTER_PROTOCOL)
if err != nil {
return fmt.Errorf("failed to get routes from netlink: %v", err)
}
for i, r := range routes {
klog.V(2).Infof("Found route to remove: %s", r.String())
if err = netlink.RouteDel(&routes[i]); err != nil {
return fmt.Errorf("failed to remove route due to %v", err)
}
}
return nil
}
// getPodCIDRsFromAllNodeSources gets the pod CIDRs for all available sources on a given node in a specific order. The
// order of preference is:
// 1. From the kube-router.io/pod-cidr annotation (preserves backwards compatibility)
// 2. From the kube-router.io/pod-cidrs annotation (allows the user to specify multiple CIDRs for a given node which
// seems to be closer aligned to how upstream is moving)
// 3. From the node's spec definition in node.Spec.PodCIDRs
func getPodCIDRsFromAllNodeSources(node *v1core.Node) (podCIDRs []string) {
// Prefer kube-router.io/pod-cidr as a matter of keeping backwards compatibility with previous functionality
podCIDR := node.GetAnnotations()["kube-router.io/pod-cidr"]
if podCIDR != "" {
_, _, err := net.ParseCIDR(podCIDR)
if err != nil {
klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidr annotation, skipping...", podCIDR)
} else {
podCIDRs = append(podCIDRs, podCIDR)
return podCIDRs
}
}
// Then attempt to find the annotation kube-router.io/pod-cidrs and prefer those second
cidrsAnnotation := node.GetAnnotations()["kube-router.io/pod-cidrs"]
if cidrsAnnotation != "" {
// this should contain comma separated CIDRs, any CIDRs which fail to parse we will emit a warning log for
// and skip it
cidrsAnnotArray := strings.Split(cidrsAnnotation, ",")
for _, cidr := range cidrsAnnotArray {
_, _, err := net.ParseCIDR(cidr)
if err != nil {
klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidrs annotation, skipping...",
cidr)
continue
}
podCIDRs = append(podCIDRs, cidr)
}
return podCIDRs
}
// Finally, if all else fails, use the PodCIDRs on the node spec
return node.Spec.PodCIDRs
}
// getBGPRouteInfoForVIP attempt to automatically find the subnet, BGP AFI/SAFI Family, and nexthop for a given VIP
// based upon whether it is an IPv4 address or an IPv6 address. Returns slash notation subnet as uint32 suitable for
// sending to GoBGP and an error if it is unable to determine the subnet automatically
func (nrc *NetworkRoutingController) getBGPRouteInfoForVIP(vip string) (subnet uint32, nh string,
afiFamily gobgpapi.Family_Afi, err error) {
ip := net.ParseIP(vip)
if ip == nil {
err = fmt.Errorf("could not parse VIP: %s", vip)
return
}
if ip.To4() != nil {
subnet = 32
afiFamily = gobgpapi.Family_AFI_IP
nhIP := nrc.krNode.FindBestIPv4NodeAddress()
if nhIP == nil {
err = fmt.Errorf("could not find an IPv4 address on node to set as nexthop for vip: %s", vip)
}
nh = nhIP.String()
return
}
if ip.To16() != nil {
subnet = 128
afiFamily = gobgpapi.Family_AFI_IP6
nhIP := nrc.krNode.FindBestIPv6NodeAddress()
if nhIP == nil {
err = fmt.Errorf("could not find an IPv6 address on node to set as nexthop for vip: %s", vip)
}
nh = nhIP.String()
return
}
err = fmt.Errorf("could not convert IP to IPv4 or IPv6, unable to find subnet for: %s", vip)
return
}
// fouPortAndProtoExist checks to see if the given FoU port is already configured on the system via iproute2
// tooling for the given protocol
//
// fou show, shows both IPv4 and IPv6 ports in the same show command, they look like:
// port 5556 gue
// port 5556 gue -6
// where the only thing that distinguishes them is the -6 or not on the end
// WARNING we're parsing a CLI tool here not an API, this may break at some point in the future
func fouPortAndProtoExist(port uint16, isIPv6 bool) bool {
const ipRoute2IPv6Prefix = "-6"
strPort := strconv.FormatInt(int64(port), 10)
fouArgs := make([]string, 0)
klog.V(2).Infof("Checking FOU Port and Proto... %s - %t", strPort, isIPv6)
if isIPv6 {
fouArgs = append(fouArgs, ipRoute2IPv6Prefix)
}
fouArgs = append(fouArgs, "fou", "show")
out, err := exec.Command("ip", fouArgs...).CombinedOutput()
// iproute2 returns an error if no fou configuration exists
if err != nil {
return false
}
strOut := string(out)
klog.V(2).Infof("Combined output of ip fou show: %s", strOut)
scanner := bufio.NewScanner(strings.NewReader(strOut))
// loop over all lines of output
for scanner.Scan() {
scannedLine := scanner.Text()
// if the output doesn't contain our port at all, then continue
if !strings.Contains(scannedLine, strPort) {
continue
}
// if this is IPv6 port and it has the correct IPv6 suffix (see example above) then return true
if isIPv6 && strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) {
return true
}
// if this is not IPv6 and it does not have an IPv6 suffix (see example above) then return true
if !isIPv6 && !strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) {
return true
}
}
return false
}
// linkFOUEnabled checks to see whether the given link has FoU (Foo over Ethernet) enabled on it, specifically since
// kube-router only works with GUE (Generic UDP Encapsulation) we look for that and not just FoU in general. If the
// linkName is enabled with FoU GUE then we return true, otherwise false
//
// Output for a FoU Enabled GUE tunnel looks like:
// ipip ipip remote <ip> local <ip> dev <dev> ttl 225 pmtudisc encap gue encap-sport auto encap-dport 5555 ...
// Output for a normal IPIP tunnel looks like:
// ipip ipip remote <ip> local <ip> dev <dev> ttl inherit ...
func linkFOUEnabled(linkName string) bool {
const fouEncapEnabled = "encap gue"
cmdArgs := []string{"-details", "link", "show", linkName}
out, err := exec.Command("ip", cmdArgs...).CombinedOutput()
if err != nil {
klog.Warningf("recevied an error while trying to look at the link details of %s, this shouldn't have happened",
linkName)
return false
}
if strings.Contains(string(out), fouEncapEnabled) {
return true
}
return false
}