package routing import ( "bufio" "crypto/sha256" "encoding/base64" "errors" "fmt" "net" "os/exec" "regexp" "strconv" "strings" "github.com/cloudnativelabs/kube-router/v2/pkg/utils" gobgpapi "github.com/osrg/gobgp/v3/api" "github.com/osrg/gobgp/v3/pkg/packet/bgp" "github.com/vishvananda/netlink/nl" v1core "k8s.io/api/core/v1" "k8s.io/klog/v2" "github.com/vishvananda/netlink" ) // Used for processing Annotations that may contain multiple items // Pass this the string and the delimiter // //nolint:unparam // while delimiter is always "," for now it provides flexibility to leave the function this way func stringToSlice(s, d string) []string { ss := make([]string, 0) if strings.Contains(s, d) { ss = strings.Split(s, d) } else { ss = append(ss, s) } return ss } func stringSliceToIPs(s []string) ([]net.IP, error) { ips := make([]net.IP, 0) for _, ipString := range s { ip := net.ParseIP(ipString) if ip == nil { return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipString) } ips = append(ips, ip) } return ips, nil } func stringSliceToIPNets(s []string) ([]net.IPNet, error) { ipNets := make([]net.IPNet, 0) for _, ipNetString := range s { ip, ipNet, err := net.ParseCIDR(strings.TrimSpace(ipNetString)) if err != nil { return nil, fmt.Errorf("could not parse \"%s\" as an CIDR", ipNetString) } if ip == nil { return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipNetString) } ipNets = append(ipNets, *ipNet) } return ipNets, nil } func stringSliceToUInt32(s []string) ([]uint32, error) { ints := make([]uint32, 0) for _, intString := range s { newInt, err := strconv.ParseUint(intString, 0, 32) if err != nil { return nil, fmt.Errorf("could not parse \"%s\" as an integer", intString) } ints = append(ints, uint32(newInt)) } return ints, nil } func stringSliceB64Decode(s []string) ([]string, error) { ss := make([]string, 0) for _, b64String := range s { decoded, err := base64.StdEncoding.DecodeString(b64String) if err != nil { return nil, fmt.Errorf("could not parse \"%s\" as a base64 encoded string", b64String) } ss = append(ss, string(decoded)) } return ss, nil } func statementsEqualByName(a, b []*gobgpapi.Statement) bool { // First a is in the outer loop ensuring that all members of a are in b for _, st1 := range a { st1Found := false for _, st2 := range b { if st1.Name == st2.Name { st1Found = true } } if !st1Found { return false } } // Second b is in the outer loop ensuring that all members of b are in a for _, st1 := range b { st1Found := false for _, st2 := range a { if st1.Name == st2.Name { st1Found = true } } if !st1Found { return false } } // If we've made it through both loops then we know that the statements arrays are equal return true } func getNodeSubnet(nodeIP net.IP) (net.IPNet, string, error) { links, err := netlink.LinkList() if err != nil { return net.IPNet{}, "", errors.New("failed to get list of links") } for _, link := range links { addresses, err := netlink.AddrList(link, netlink.FAMILY_ALL) if err != nil { return net.IPNet{}, "", errors.New("failed to get list of addr") } for _, addr := range addresses { if addr.IPNet.IP.Equal(nodeIP) { return *addr.IPNet, link.Attrs().Name, nil } } } return net.IPNet{}, "", errors.New("failed to find interface with specified node ip") } // generateTunnelName will generate a name for a tunnel interface given a node IP // Since linux restricts interface names to 15 characters, we take the sha-256 of the node IP after removing // non-entropic characters like '.' and ':', and then use the first 12 bytes of it. This allows us to cater to both // long IPv4 addresses and much longer IPv6 addresses. func generateTunnelName(nodeIP string) string { // remove dots from an IPv4 address strippedIP := strings.ReplaceAll(nodeIP, ".", "") // remove colons from an IPv6 address strippedIP = strings.ReplaceAll(strippedIP, ":", "") h := sha256.New() h.Write([]byte(strippedIP)) sum := h.Sum(nil) return "tun-" + fmt.Sprintf("%x", sum)[0:11] } // validateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to // gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it // returns an error. func validateCommunity(arg string) error { _, err := strconv.ParseUint(arg, 10, bgpCommunityMaxSize) if err == nil { return nil } _regexpCommunity := regexp.MustCompile(`(\d+):(\d+)`) elems := _regexpCommunity.FindStringSubmatch(arg) if len(elems) == 3 { if _, err := strconv.ParseUint(elems[1], 10, bgpCommunityMaxPartSize); err == nil { if _, err = strconv.ParseUint(elems[2], 10, bgpCommunityMaxPartSize); err == nil { return nil } } } for _, v := range bgp.WellKnownCommunityNameMap { if arg == v { return nil } } return fmt.Errorf("failed to parse %s as community", arg) } // parseBGPNextHop takes in a GoBGP Path and parses out the destination's next hop from its attributes. If it // can't parse a next hop IP from the GoBGP Path, it returns an error. func parseBGPNextHop(path *gobgpapi.Path) (net.IP, error) { for _, pAttr := range path.GetPattrs() { unmarshalNew, err := pAttr.UnmarshalNew() if err != nil { return nil, fmt.Errorf("failed to unmarshal path attribute: %s", err) } switch t := unmarshalNew.(type) { case *gobgpapi.NextHopAttribute: // This is the primary way that we receive NextHops and happens when both the client and the server exchange // next hops on the same IP family that they negotiated BGP on nextHopIP := net.ParseIP(t.NextHop) if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { return nextHopIP, nil } return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHop) case *gobgpapi.MpReachNLRIAttribute: // in the case where the server and the client are exchanging next-hops that don't relate to their primary // IP family, we get MpReachNLRIAttribute instead of NextHopAttributes // TODO: here we only take the first next hop, at some point in the future it would probably be best to // consider multiple next hops nextHopIP := net.ParseIP(t.NextHops[0]) if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { return nextHopIP, nil } return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHops[0]) } } return nil, fmt.Errorf("could not parse next hop received from GoBGP for path: %s", path) } // parseBGPPath takes in a GoBGP Path and parses out the destination subnet and the next hop from its attributes. // If successful, it will return the destination of the BGP path as a subnet form and the next hop. If it // can't parse the destination or the next hop IP, it returns an error. func parseBGPPath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) { nextHop, err := parseBGPNextHop(path) if err != nil { return nil, nil, err } nlri := path.GetNlri() var prefix gobgpapi.IPAddressPrefix err = nlri.UnmarshalTo(&prefix) if err != nil { return nil, nil, fmt.Errorf("invalid nlri in advertised path") } dstSubnet, err := netlink.ParseIPNet(prefix.Prefix + "/" + fmt.Sprint(prefix.PrefixLen)) if err != nil { return nil, nil, fmt.Errorf("couldn't parse IP subnet from nlri advertised path") } return dstSubnet, nextHop, nil } // deleteRoutesByDestination attempts to safely find all routes based upon its destination subnet and delete them func deleteRoutesByDestination(destinationSubnet *net.IPNet) error { routes, err := netlink.RouteListFiltered(nl.FAMILY_ALL, &netlink.Route{ Dst: destinationSubnet, Protocol: zebraRouteOriginator, }, netlink.RT_FILTER_DST|netlink.RT_FILTER_PROTOCOL) if err != nil { return fmt.Errorf("failed to get routes from netlink: %v", err) } for i, r := range routes { klog.V(2).Infof("Found route to remove: %s", r.String()) if err = netlink.RouteDel(&routes[i]); err != nil { return fmt.Errorf("failed to remove route due to %v", err) } } return nil } // getPodCIDRsFromAllNodeSources gets the pod CIDRs for all available sources on a given node in a specific order. The // order of preference is: // 1. From the kube-router.io/pod-cidr annotation (preserves backwards compatibility) // 2. From the kube-router.io/pod-cidrs annotation (allows the user to specify multiple CIDRs for a given node which // seems to be closer aligned to how upstream is moving) // 3. From the node's spec definition in node.Spec.PodCIDRs func getPodCIDRsFromAllNodeSources(node *v1core.Node) (podCIDRs []string) { // Prefer kube-router.io/pod-cidr as a matter of keeping backwards compatibility with previous functionality podCIDR := node.GetAnnotations()["kube-router.io/pod-cidr"] if podCIDR != "" { _, _, err := net.ParseCIDR(podCIDR) if err != nil { klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidr annotation, skipping...", podCIDR) } else { podCIDRs = append(podCIDRs, podCIDR) return podCIDRs } } // Then attempt to find the annotation kube-router.io/pod-cidrs and prefer those second cidrsAnnotation := node.GetAnnotations()["kube-router.io/pod-cidrs"] if cidrsAnnotation != "" { // this should contain comma separated CIDRs, any CIDRs which fail to parse we will emit a warning log for // and skip it cidrsAnnotArray := strings.Split(cidrsAnnotation, ",") for _, cidr := range cidrsAnnotArray { _, _, err := net.ParseCIDR(cidr) if err != nil { klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidrs annotation, skipping...", cidr) continue } podCIDRs = append(podCIDRs, cidr) } return podCIDRs } // Finally, if all else fails, use the PodCIDRs on the node spec return node.Spec.PodCIDRs } // getBGPRouteInfoForVIP attempt to automatically find the subnet, BGP AFI/SAFI Family, and nexthop for a given VIP // based upon whether it is an IPv4 address or an IPv6 address. Returns slash notation subnet as uint32 suitable for // sending to GoBGP and an error if it is unable to determine the subnet automatically func (nrc *NetworkRoutingController) getBGPRouteInfoForVIP(vip string) (subnet uint32, nh string, afiFamily gobgpapi.Family_Afi, err error) { ip := net.ParseIP(vip) if ip == nil { err = fmt.Errorf("could not parse VIP: %s", vip) return } if ip.To4() != nil { subnet = 32 afiFamily = gobgpapi.Family_AFI_IP nhIP := utils.FindBestIPv4NodeAddress(nrc.primaryIP, nrc.nodeIPv4Addrs) if nhIP == nil { err = fmt.Errorf("could not find an IPv4 address on node to set as nexthop for vip: %s", vip) } nh = nhIP.String() return } if ip.To16() != nil { subnet = 128 afiFamily = gobgpapi.Family_AFI_IP6 nhIP := utils.FindBestIPv6NodeAddress(nrc.primaryIP, nrc.nodeIPv6Addrs) if nhIP == nil { err = fmt.Errorf("could not find an IPv6 address on node to set as nexthop for vip: %s", vip) } nh = nhIP.String() return } err = fmt.Errorf("could not convert IP to IPv4 or IPv6, unable to find subnet for: %s", vip) return } // fouPortAndProtoExist checks to see if the given FoU port is already configured on the system via iproute2 // tooling for the given protocol // // fou show, shows both IPv4 and IPv6 ports in the same show command, they look like: // port 5556 gue // port 5556 gue -6 // where the only thing that distinguishes them is the -6 or not on the end // WARNING we're parsing a CLI tool here not an API, this may break at some point in the future func fouPortAndProtoExist(port uint16, isIPv6 bool) bool { const ipRoute2IPv6Prefix = "-6" strPort := strconv.FormatInt(int64(port), 10) fouArgs := make([]string, 0) klog.V(2).Infof("Checking FOU Port and Proto... %s - %t", strPort, isIPv6) if isIPv6 { fouArgs = append(fouArgs, ipRoute2IPv6Prefix) } fouArgs = append(fouArgs, "fou", "show") out, err := exec.Command("ip", fouArgs...).CombinedOutput() // iproute2 returns an error if no fou configuration exists if err != nil { return false } strOut := string(out) klog.V(2).Infof("Combined output of ip fou show: %s", strOut) scanner := bufio.NewScanner(strings.NewReader(strOut)) // loop over all lines of output for scanner.Scan() { scannedLine := scanner.Text() // if the output doesn't contain our port at all, then continue if !strings.Contains(scannedLine, strPort) { continue } // if this is IPv6 port and it has the correct IPv6 suffix (see example above) then return true if isIPv6 && strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) { return true } // if this is not IPv6 and it does not have an IPv6 suffix (see example above) then return true if !isIPv6 && !strings.HasSuffix(scannedLine, ipRoute2IPv6Prefix) { return true } } return false } // linkFOUEnabled checks to see whether the given link has FoU (Foo over Ethernet) enabled on it, specifically since // kube-router only works with GUE (Generic UDP Encapsulation) we look for that and not just FoU in general. If the // linkName is enabled with FoU GUE then we return true, otherwise false // // Output for a FoU Enabled GUE tunnel looks like: // ipip ipip remote local dev ttl 225 pmtudisc encap gue encap-sport auto encap-dport 5555 ... // Output for a normal IPIP tunnel looks like: // ipip ipip remote local dev ttl inherit ... func linkFOUEnabled(linkName string) bool { const fouEncapEnabled = "encap gue" cmdArgs := []string{"-details", "link", "show", linkName} out, err := exec.Command("ip", cmdArgs...).CombinedOutput() if err != nil { klog.Warning("recevied an error while trying to look at the link details of %s, this shouldn't have happened", linkName) return false } if strings.Contains(string(out), fouEncapEnabled) { return true } return false }