kube-router/pkg/controllers/netpol/network_policy_controller.go
2023-10-07 08:52:31 -05:00

944 lines
35 KiB
Go

package netpol
import (
"bytes"
"crypto/sha256"
"encoding/base32"
"fmt"
"net"
"strconv"
"strings"
"sync"
"time"
"github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck"
"github.com/cloudnativelabs/kube-router/v2/pkg/metrics"
"github.com/cloudnativelabs/kube-router/v2/pkg/options"
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
"github.com/coreos/go-iptables/iptables"
"k8s.io/klog/v2"
v1core "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
netutils "k8s.io/utils/net"
)
const (
kubePodFirewallChainPrefix = "KUBE-POD-FW-"
kubeNetworkPolicyChainPrefix = "KUBE-NWPLCY-"
kubeSourceIPSetPrefix = "KUBE-SRC-"
kubeDestinationIPSetPrefix = "KUBE-DST-"
kubeInputChainName = "KUBE-ROUTER-INPUT"
kubeForwardChainName = "KUBE-ROUTER-FORWARD"
kubeOutputChainName = "KUBE-ROUTER-OUTPUT"
kubeDefaultNetpolChain = "KUBE-NWPLCY-DEFAULT"
kubeIngressPolicyType = "ingress"
kubeEgressPolicyType = "egress"
kubeBothPolicyType = "both"
syncVersionBase = 10
)
var (
defaultChains = map[string]string{
"INPUT": kubeInputChainName,
"FORWARD": kubeForwardChainName,
"OUTPUT": kubeOutputChainName,
}
)
// Network policy controller provides both ingress and egress filtering for the pods as per the defined network
// policies. Two different types of iptables chains are used. Each pod running on the node which either
// requires ingress or egress filtering gets a pod specific chains. Each network policy has a iptables chain, which
// has rules expressed through ipsets matching source and destination pod ip's. In the FORWARD chain of the
// filter table a rule is added to jump the traffic originating (in case of egress network policy) from the pod
// or destined (in case of ingress network policy) to the pod specific iptables chain. Each
// pod specific iptables chain has rules to jump to the network polices chains, that pod matches. So packet
// originating/destined from/to pod goes through filter table's, FORWARD chain, followed by pod specific chain,
// followed by one or more network policy chains, till there is a match which will accept the packet, or gets
// dropped by the rule in the pod chain, if there is no match.
// NetworkPolicyController struct to hold information required by NetworkPolicyController
type NetworkPolicyController struct {
nodeHostName string
serviceClusterIPRanges []net.IPNet
serviceExternalIPRanges []net.IPNet
serviceLoadBalancerIPRanges []net.IPNet
serviceNodePortRange string
mu sync.Mutex
syncPeriod time.Duration
MetricsEnabled bool
healthChan chan<- *healthcheck.ControllerHeartbeat
fullSyncRequestChan chan struct{}
ipsetMutex *sync.Mutex
iptablesCmdHandlers map[v1core.IPFamily]utils.IPTablesHandler
iptablesSaveRestore map[v1core.IPFamily]utils.IPTablesSaveRestorer
filterTableRules map[v1core.IPFamily]*bytes.Buffer
ipSetHandlers map[v1core.IPFamily]utils.IPSetHandler
nodeIPs map[v1core.IPFamily]net.IP
podLister cache.Indexer
npLister cache.Indexer
nsLister cache.Indexer
PodEventHandler cache.ResourceEventHandler
NamespaceEventHandler cache.ResourceEventHandler
NetworkPolicyEventHandler cache.ResourceEventHandler
}
// internal structure to represent a network policy
type networkPolicyInfo struct {
name string
namespace string
podSelector labels.Selector
// set of pods matching network policy spec podselector label selector
targetPods map[string]podInfo
// whitelist ingress rules from the network policy spec
ingressRules []ingressRule
// whitelist egress rules from the network policy spec
egressRules []egressRule
// policy type "ingress" or "egress" or "both" as defined by PolicyType in the spec
policyType string
}
// internal structure to represent Pod
type podInfo struct {
ip string
ips []v1core.PodIP
name string
namespace string
labels map[string]string
}
// internal structure to represent NetworkPolicyIngressRule in the spec
type ingressRule struct {
matchAllPorts bool
ports []protocolAndPort
namedPorts []endPoints
matchAllSource bool
srcPods []podInfo
srcIPBlocks map[v1core.IPFamily][][]string
}
// internal structure to represent NetworkPolicyEgressRule in the spec
type egressRule struct {
matchAllPorts bool
ports []protocolAndPort
namedPorts []endPoints
matchAllDestinations bool
dstPods []podInfo
dstIPBlocks map[v1core.IPFamily][][]string
}
type protocolAndPort struct {
protocol string
port string
endport string
}
type endPoints struct {
ips map[v1core.IPFamily][]string
protocolAndPort
}
type numericPort2eps map[string]*endPoints
type protocol2eps map[string]numericPort2eps
type namedPort2eps map[string]protocol2eps
// Run runs forever till we receive notification on stopCh
func (npc *NetworkPolicyController) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{},
wg *sync.WaitGroup) {
t := time.NewTicker(npc.syncPeriod)
defer t.Stop()
defer wg.Done()
klog.Info("Starting network policy controller")
npc.healthChan = healthChan
// setup kube-router specific top level custom chains (KUBE-ROUTER-INPUT, KUBE-ROUTER-FORWARD, KUBE-ROUTER-OUTPUT)
npc.ensureTopLevelChains()
// setup default network policy chain that is applied to traffic from/to the pods that does not match any network
// policy
npc.ensureDefaultNetworkPolicyChain()
// Full syncs of the network policy controller take a lot of time and can only be processed one at a time,
// therefore, we start it in it's own goroutine and request a sync through a single item channel
klog.Info("Starting network policy controller full sync goroutine")
wg.Add(1)
go func(fullSyncRequest <-chan struct{}, stopCh <-chan struct{}, wg *sync.WaitGroup) {
defer wg.Done()
for {
// Add an additional non-blocking select to ensure that if the stopCh channel is closed it is handled first
select {
case <-stopCh:
klog.Info("Shutting down network policies full sync goroutine")
return
default:
}
select {
case <-stopCh:
klog.Info("Shutting down network policies full sync goroutine")
return
case <-fullSyncRequest:
klog.V(3).Info("Received request for a full sync, processing")
npc.fullPolicySync() // fullPolicySync() is a blocking request here
}
}
}(npc.fullSyncRequestChan, stopCh, wg)
// loop forever till notified to stop on stopCh
for {
klog.V(1).Info("Requesting periodic sync of iptables to reflect network policies")
npc.RequestFullSync()
select {
case <-stopCh:
klog.Infof("Shutting down network policies controller")
return
case <-t.C:
}
}
}
// RequestFullSync allows the request of a full network policy sync without blocking the callee
func (npc *NetworkPolicyController) RequestFullSync() {
select {
case npc.fullSyncRequestChan <- struct{}{}:
klog.V(3).Info("Full sync request queue was empty so a full sync request was successfully sent")
default: // Don't block if the buffered channel is full, return quickly so that we don't block callee execution
klog.V(1).Info("Full sync request queue was full, skipping...")
}
}
// Sync synchronizes iptables to desired state of network policies
func (npc *NetworkPolicyController) fullPolicySync() {
var err error
var networkPoliciesInfo []networkPolicyInfo
npc.mu.Lock()
defer npc.mu.Unlock()
healthcheck.SendHeartBeat(npc.healthChan, "NPC")
start := time.Now()
syncVersion := strconv.FormatInt(start.UnixNano(), syncVersionBase)
defer func() {
endTime := time.Since(start)
if npc.MetricsEnabled {
metrics.ControllerIptablesSyncTime.Observe(endTime.Seconds())
}
klog.V(1).Infof("sync iptables took %v", endTime)
}()
klog.V(1).Infof("Starting sync of iptables with version: %s", syncVersion)
// ensure kube-router specific top level chains and corresponding rules exist
npc.ensureTopLevelChains()
// ensure default network policy chain that is applied to traffic from/to the pods that does not match any network
// policy
npc.ensureDefaultNetworkPolicyChain()
networkPoliciesInfo, err = npc.buildNetworkPoliciesInfo()
if err != nil {
klog.Errorf("Aborting sync. Failed to build network policies: %v", err.Error())
return
}
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
npc.filterTableRules[ipFamily].Reset()
saveStart := time.Now()
err := iptablesSaveRestore.SaveInto("filter", npc.filterTableRules[ipFamily])
saveEndTime := time.Since(saveStart)
if npc.MetricsEnabled {
switch ipFamily {
case v1core.IPv4Protocol:
metrics.ControllerIptablesV4SaveTime.Observe(saveEndTime.Seconds())
case v1core.IPv6Protocol:
metrics.ControllerIptablesV6SaveTime.Observe(saveEndTime.Seconds())
}
}
klog.V(1).Infof("Saving %v iptables rules took %v", ipFamily, saveEndTime)
if err != nil {
klog.Errorf("Aborting sync. Failed to run iptables-save: %v", err.Error())
return
}
}
activePolicyChains, activePolicyIPSets, err := npc.syncNetworkPolicyChains(networkPoliciesInfo, syncVersion)
if err != nil {
klog.Errorf("Aborting sync. Failed to sync network policy chains: %v", err.Error())
return
}
activePodFwChains := npc.syncPodFirewallChains(networkPoliciesInfo, syncVersion)
// Makes sure that the ACCEPT rules for packets marked with "0x20000" are added to the end of each of kube-router's
// top level chains
npc.ensureExplicitAccept()
err = npc.cleanupStaleRules(activePolicyChains, activePodFwChains, false)
if err != nil {
klog.Errorf("Aborting sync. Failed to cleanup stale iptables rules: %v", err.Error())
return
}
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
ipFamily := ipFamily
restoreStart := time.Now()
err := iptablesSaveRestore.Restore("filter", npc.filterTableRules[ipFamily].Bytes())
restoreEndTime := time.Since(restoreStart)
if npc.MetricsEnabled {
switch ipFamily {
case v1core.IPv4Protocol:
metrics.ControllerIptablesV4RestoreTime.Observe(restoreEndTime.Seconds())
case v1core.IPv6Protocol:
metrics.ControllerIptablesV6RestoreTime.Observe(restoreEndTime.Seconds())
}
}
klog.V(1).Infof("Restoring %v iptables rules took %v", ipFamily, restoreEndTime)
if err != nil {
klog.Errorf("Aborting sync. Failed to run iptables-restore: %v\n%s",
err.Error(), npc.filterTableRules[ipFamily].String())
return
}
}
err = npc.cleanupStaleIPSets(activePolicyIPSets)
if err != nil {
klog.Errorf("Failed to cleanup stale ipsets: %v", err.Error())
return
}
}
func (npc *NetworkPolicyController) iptablesCmdHandlerForCIDR(cidr *net.IPNet) (utils.IPTablesHandler, error) {
if netutils.IsIPv4CIDR(cidr) {
return npc.iptablesCmdHandlers[v1core.IPv4Protocol], nil
}
if netutils.IsIPv6CIDR(cidr) {
return npc.iptablesCmdHandlers[v1core.IPv6Protocol], nil
}
return nil, fmt.Errorf("invalid CIDR")
}
func (npc *NetworkPolicyController) allowTrafficToClusterIPRange(
serviceVIPPosition int,
serviceClusterIPRange *net.IPNet,
addUUIDForRuleSpec func(chain string, ruleSpec *[]string) (string, error),
ensureRuleAtPosition func(iptablesCmdHandler utils.IPTablesHandler,
chain string, ruleSpec []string, uuid string, position int),
comment string) {
whitelistServiceVips := []string{"-m", "comment", "--comment", comment,
"-d", serviceClusterIPRange.String(), "-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
iptablesCmdHandler, err := npc.iptablesCmdHandlerForCIDR(serviceClusterIPRange)
if err != nil {
klog.Fatalf("Failed to get iptables handler: %s", err.Error())
}
ensureRuleAtPosition(iptablesCmdHandler,
kubeInputChainName, whitelistServiceVips, uuid, serviceVIPPosition)
}
// Creates custom chains KUBE-ROUTER-INPUT, KUBE-ROUTER-FORWARD, KUBE-ROUTER-OUTPUT
// and following rules in the filter table to jump from builtin chain to custom chain
// -A INPUT -m comment --comment "kube-router netpol" -j KUBE-ROUTER-INPUT
// -A FORWARD -m comment --comment "kube-router netpol" -j KUBE-ROUTER-FORWARD
// -A OUTPUT -m comment --comment "kube-router netpol" -j KUBE-ROUTER-OUTPUT
func (npc *NetworkPolicyController) ensureTopLevelChains() {
const serviceVIPPosition = 1
rulePosition := map[v1core.IPFamily]int{v1core.IPv4Protocol: 1, v1core.IPv6Protocol: 1}
addUUIDForRuleSpec := func(chain string, ruleSpec *[]string) (string, error) {
hash := sha256.Sum256([]byte(chain + strings.Join(*ruleSpec, "")))
encoded := base32.StdEncoding.EncodeToString(hash[:])[:16]
for idx, part := range *ruleSpec {
if part == "--comment" {
(*ruleSpec)[idx+1] = (*ruleSpec)[idx+1] + " - " + encoded
return encoded, nil
}
}
return "", fmt.Errorf("could not find a comment in the ruleSpec string given: %s",
strings.Join(*ruleSpec, " "))
}
ensureRuleAtPosition := func(
iptablesCmdHandler utils.IPTablesHandler, chain string, ruleSpec []string, uuid string, position int) {
exists, err := iptablesCmdHandler.Exists("filter", chain, ruleSpec...)
if err != nil {
klog.Fatalf("Failed to verify rule exists in %s chain due to %s", chain, err.Error())
}
if !exists {
klog.V(2).Infof("Rule '%s' doesn't exist in chain %s, inserting at position %d",
strings.Join(ruleSpec, " "), chain, position)
err := iptablesCmdHandler.Insert("filter", chain, position, ruleSpec...)
if err != nil {
klog.Fatalf("Failed to run iptables command to insert in %s chain %s", chain, err.Error())
}
return
}
rules, err := iptablesCmdHandler.List("filter", chain)
if err != nil {
klog.Fatalf("failed to list rules in filter table %s chain due to %s", chain, err.Error())
}
var ruleNo, ruleIndexOffset int
for i, rule := range rules {
rule = strings.Replace(rule, "\"", "", 2) // removes quote from comment string
if strings.HasPrefix(rule, "-P") || strings.HasPrefix(rule, "-N") {
// if this chain has a default policy, then it will show as rule #1 from iptablesCmdHandler.List so we
// need to account for this offset
ruleIndexOffset++
continue
}
if strings.Contains(rule, uuid) {
// range uses a 0 index, but iptables uses a 1 index so we need to increase ruleNo by 1
ruleNo = i + 1 - ruleIndexOffset
break
}
}
if ruleNo != position {
klog.V(2).Infof("Rule '%s' existed in chain %s, but was in position %d instead of %d, "+
"moving...", strings.Join(ruleSpec, " "), chain, ruleNo, position)
err = iptablesCmdHandler.Insert("filter", chain, position, ruleSpec...)
if err != nil {
klog.Fatalf("Failed to run iptables command to insert in %s chain %s", chain, err.Error())
}
err = iptablesCmdHandler.Delete("filter", chain, strconv.Itoa(ruleNo+1))
if err != nil {
klog.Fatalf("Failed to delete incorrect rule in %s chain due to %s", chain, err.Error())
}
}
}
for _, handler := range npc.iptablesCmdHandlers {
for builtinChain, customChain := range defaultChains {
exists, err := handler.ChainExists("filter", customChain)
if err != nil {
klog.Fatalf("failed to run iptables command to create %s chain due to %s", customChain,
err.Error())
}
if !exists {
klog.V(2).Infof("Custom chain was missing, creating: %s in filter table", customChain)
err = handler.NewChain("filter", customChain)
if err != nil {
klog.Fatalf("failed to run iptables command to create %s chain due to %s", customChain,
err.Error())
}
}
args := []string{"-m", "comment", "--comment", "kube-router netpol", "-j", customChain}
uuid, err := addUUIDForRuleSpec(builtinChain, &args)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
klog.V(2).Infof("Ensuring jump to chain %s from %s at position %d", customChain, builtinChain,
serviceVIPPosition)
ensureRuleAtPosition(handler, builtinChain, args, uuid, serviceVIPPosition)
}
}
if len(npc.serviceClusterIPRanges) > 0 {
for idx, serviceRange := range npc.serviceClusterIPRanges {
var family v1core.IPFamily
if serviceRange.IP.To4() != nil {
family = v1core.IPv4Protocol
} else {
family = v1core.IPv6Protocol
}
klog.V(2).Infof("Allow traffic to ingress towards Cluster IP Range: %s for family: %s",
serviceRange.String(), family)
npc.allowTrafficToClusterIPRange(rulePosition[family], &npc.serviceClusterIPRanges[idx],
addUUIDForRuleSpec, ensureRuleAtPosition, "allow traffic to primary/secondary cluster IP range")
rulePosition[family]++
}
} else {
klog.Fatalf("Primary service cluster IP range is not configured")
}
for family, handler := range npc.iptablesCmdHandlers {
whitelistTCPNodeports := []string{"-p", "tcp", "-m", "comment", "--comment",
"allow LOCAL TCP traffic to node ports", "-m", "addrtype", "--dst-type", "LOCAL",
"-m", "multiport", "--dports", npc.serviceNodePortRange, "-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistTCPNodeports)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
klog.V(2).Infof("Allow TCP traffic to ingress towards node port range: %s for family: %s",
npc.serviceNodePortRange, family)
ensureRuleAtPosition(handler,
kubeInputChainName, whitelistTCPNodeports, uuid, rulePosition[family])
rulePosition[family]++
whitelistUDPNodeports := []string{"-p", "udp", "-m", "comment", "--comment",
"allow LOCAL UDP traffic to node ports", "-m", "addrtype", "--dst-type", "LOCAL",
"-m", "multiport", "--dports", npc.serviceNodePortRange, "-j", "RETURN"}
uuid, err = addUUIDForRuleSpec(kubeInputChainName, &whitelistUDPNodeports)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
klog.V(2).Infof("Allow UDP traffic to ingress towards node port range: %s for family: %s",
npc.serviceNodePortRange, family)
ensureRuleAtPosition(handler,
kubeInputChainName, whitelistUDPNodeports, uuid, rulePosition[family])
rulePosition[family]++
}
for idx, externalIPRange := range npc.serviceExternalIPRanges {
var family v1core.IPFamily
if externalIPRange.IP.To4() != nil {
family = v1core.IPv4Protocol
} else {
family = v1core.IPv6Protocol
}
whitelistServiceVips := []string{"-m", "comment", "--comment",
"allow traffic to external IP range: " + externalIPRange.String(), "-d", externalIPRange.String(),
"-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
// Access externalIPRange via index to avoid implicit memory aliasing
cidrHandler, err := npc.iptablesCmdHandlerForCIDR(&npc.serviceExternalIPRanges[idx])
if err != nil {
klog.Fatalf("Failed to get iptables handler: %s", err.Error())
}
klog.V(2).Infof("Allow traffic to ingress towards External IP Range: %s for family: %s",
externalIPRange.String(), family)
ensureRuleAtPosition(cidrHandler,
kubeInputChainName, whitelistServiceVips, uuid, rulePosition[family])
rulePosition[family]++
}
for idx, loadBalancerIPRange := range npc.serviceLoadBalancerIPRanges {
var family v1core.IPFamily
if loadBalancerIPRange.IP.To4() != nil {
family = v1core.IPv4Protocol
} else {
family = v1core.IPv6Protocol
}
whitelistServiceVips := []string{"-m", "comment", "--comment",
"allow traffic to load balancer IP range: " + loadBalancerIPRange.String(), "-d", loadBalancerIPRange.String(),
"-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
// Access loadBalancerIPRange via index to avoid implicit memory aliasing
cidrHandler, err := npc.iptablesCmdHandlerForCIDR(&npc.serviceLoadBalancerIPRanges[idx])
if err != nil {
klog.Fatalf("Failed to get iptables handler: %s", err.Error())
}
klog.V(2).Infof("Allow traffic to ingress towards Load Balancer IP Range: %s for family: %s",
loadBalancerIPRange.String(), family)
ensureRuleAtPosition(cidrHandler,
kubeInputChainName, whitelistServiceVips, uuid, rulePosition[family])
rulePosition[family]++
}
}
func (npc *NetworkPolicyController) ensureExplicitAccept() {
// for the traffic to/from the local pod's let network policy controller be
// authoritative entity to ACCEPT the traffic if it complies to network policies
for _, filterTableRules := range npc.filterTableRules {
for _, chain := range defaultChains {
comment := "\"rule to explicitly ACCEPT traffic that comply to network policies\""
args := []string{"-m", "comment", "--comment", comment, "-m", "mark", "--mark", "0x20000/0x20000",
"-j", "ACCEPT"}
utils.AppendUnique(filterTableRules, chain, args)
}
}
}
// Creates custom chains KUBE-NWPLCY-DEFAULT
func (npc *NetworkPolicyController) ensureDefaultNetworkPolicyChain() {
for _, iptablesCmdHandler := range npc.iptablesCmdHandlers {
markArgs := make([]string, 0)
markComment := "rule to mark traffic matching a network policy"
markArgs = append(markArgs, "-j", "MARK", "-m", "comment", "--comment", markComment,
"--set-xmark", "0x10000/0x10000")
exists, err := iptablesCmdHandler.ChainExists("filter", kubeDefaultNetpolChain)
if err != nil {
klog.Fatalf("failed to check for the existence of chain %s, error: %v", kubeDefaultNetpolChain, err)
}
if !exists {
err = iptablesCmdHandler.NewChain("filter", kubeDefaultNetpolChain)
if err != nil {
klog.Fatalf("failed to run iptables command to create %s chain due to %s",
kubeDefaultNetpolChain, err.Error())
}
}
err = iptablesCmdHandler.AppendUnique("filter", kubeDefaultNetpolChain, markArgs...)
if err != nil {
klog.Fatalf("Failed to run iptables command: %s", err.Error())
}
}
}
func (npc *NetworkPolicyController) cleanupStaleRules(activePolicyChains, activePodFwChains map[string]bool,
deleteDefaultChains bool) error {
cleanupPodFwChains := make([]string, 0)
cleanupPolicyChains := make([]string, 0)
for ipFamily, iptablesCmdHandler := range npc.iptablesCmdHandlers {
// find iptables chains and ipsets that are no longer used by comparing current to the active maps we were passed
chains, err := iptablesCmdHandler.ListChains("filter")
if err != nil {
return fmt.Errorf("unable to list chains: %w", err)
}
for _, chain := range chains {
if strings.HasPrefix(chain, kubeNetworkPolicyChainPrefix) {
if chain == kubeDefaultNetpolChain {
continue
}
if _, ok := activePolicyChains[chain]; !ok {
cleanupPolicyChains = append(cleanupPolicyChains, chain)
continue
}
}
if strings.HasPrefix(chain, kubePodFirewallChainPrefix) {
if _, ok := activePodFwChains[chain]; !ok {
cleanupPodFwChains = append(cleanupPodFwChains, chain)
continue
}
}
}
var newChains, newRules, desiredFilterTable bytes.Buffer
rules := strings.Split(npc.filterTableRules[ipFamily].String(), "\n")
if len(rules) > 0 && rules[len(rules)-1] == "" {
rules = rules[:len(rules)-1]
}
for _, rule := range rules {
skipRule := false
for _, podFWChainName := range cleanupPodFwChains {
if strings.Contains(rule, podFWChainName) {
skipRule = true
break
}
}
for _, policyChainName := range cleanupPolicyChains {
if strings.Contains(rule, policyChainName) {
skipRule = true
break
}
}
if deleteDefaultChains {
for _, chain := range []string{kubeInputChainName, kubeForwardChainName, kubeOutputChainName,
kubeDefaultNetpolChain} {
if strings.Contains(rule, chain) {
skipRule = true
break
}
}
}
if strings.Contains(rule, "COMMIT") || strings.HasPrefix(rule, "# ") {
skipRule = true
}
if skipRule {
continue
}
if strings.HasPrefix(rule, ":") {
newChains.WriteString(rule + " - [0:0]\n")
}
if strings.HasPrefix(rule, "-") {
newRules.WriteString(rule + "\n")
}
}
desiredFilterTable.WriteString("*filter" + "\n")
desiredFilterTable.Write(newChains.Bytes())
desiredFilterTable.Write(newRules.Bytes())
desiredFilterTable.WriteString("COMMIT" + "\n")
npc.filterTableRules[ipFamily] = &desiredFilterTable
}
return nil
}
func (npc *NetworkPolicyController) cleanupStaleIPSets(activePolicyIPSets map[string]bool) error {
// There are certain actions like Cleanup() actions that aren't working with full instantiations of the controller
// and in these instances the mutex may not be present and may not need to be present as they are operating out of a
// single goroutine where there is no need for locking
if nil != npc.ipsetMutex {
klog.V(1).Infof("Attempting to attain ipset mutex lock")
npc.ipsetMutex.Lock()
klog.V(1).Infof("Attained ipset mutex lock, continuing...")
defer func() {
npc.ipsetMutex.Unlock()
klog.V(1).Infof("Returned ipset mutex lock")
}()
}
for _, ipsets := range npc.ipSetHandlers {
cleanupPolicyIPSets := make([]*utils.Set, 0)
if err := ipsets.Save(); err != nil {
klog.Fatalf("failed to initialize ipsets command executor due to %s", err.Error())
}
for _, set := range ipsets.Sets() {
if strings.HasPrefix(set.Name, kubeSourceIPSetPrefix) ||
strings.HasPrefix(set.Name, kubeDestinationIPSetPrefix) {
if _, ok := activePolicyIPSets[set.Name]; !ok {
cleanupPolicyIPSets = append(cleanupPolicyIPSets, set)
}
}
}
// cleanup network policy ipsets
for _, set := range cleanupPolicyIPSets {
if err := set.Destroy(); err != nil {
return fmt.Errorf("failed to delete ipset %s due to %s", set.Name, err)
}
}
}
return nil
}
// Cleanup cleanup configurations done
func (npc *NetworkPolicyController) Cleanup() {
klog.Info("Cleaning up NetworkPolicyController configurations...")
var emptySet map[string]bool
// Take a dump (iptables-save) of the current filter table for cleanupStaleRules() to work on
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
if err := iptablesSaveRestore.SaveInto("filter", npc.filterTableRules[ipFamily]); err != nil {
klog.Errorf("error encountered attempting to list iptables rules for cleanup: %v", err)
return
}
}
// Run cleanupStaleRules() to get rid of most of the kube-router rules (this is the same logic that runs as
// part NPC's runtime loop). Setting the last parameter to true causes even the default chains are removed.
err := npc.cleanupStaleRules(emptySet, emptySet, true)
if err != nil {
klog.Errorf("error encountered attempting to cleanup iptables rules: %v", err)
return
}
// Restore (iptables-restore) npc's cleaned up version of the iptables filter chain
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
if err = iptablesSaveRestore.Restore("filter", npc.filterTableRules[ipFamily].Bytes()); err != nil {
klog.Errorf(
"error encountered while loading running iptables-restore: %v\n%s", err,
npc.filterTableRules[ipFamily].String())
}
}
// Cleanup ipsets
err = npc.cleanupStaleIPSets(emptySet)
if err != nil {
klog.Errorf("error encountered while cleaning ipsets: %v", err)
return
}
klog.Infof("Successfully cleaned the NetworkPolicyController configurations done by kube-router")
}
func NewIPTablesHandlers(config *options.KubeRouterConfig) (
map[v1core.IPFamily]utils.IPTablesHandler, map[v1core.IPFamily]utils.IPSetHandler, error) {
iptablesCmdHandlers := make(map[v1core.IPFamily]utils.IPTablesHandler, 2)
ipSetHandlers := make(map[v1core.IPFamily]utils.IPSetHandler, 2)
if config.EnableIPv4 {
iptHandler, err := iptables.NewWithProtocol(iptables.ProtocolIPv4)
if err != nil {
return nil, nil, fmt.Errorf("failed to create iptables handler: %w", err)
}
iptablesCmdHandlers[v1core.IPv4Protocol] = iptHandler
ipset, err := utils.NewIPSet(false)
if err != nil {
return nil, nil, fmt.Errorf("failed to create ipset handler: %w", err)
}
ipSetHandlers[v1core.IPv4Protocol] = ipset
}
if config.EnableIPv6 {
iptHandler, err := iptables.NewWithProtocol(iptables.ProtocolIPv6)
if err != nil {
return nil, nil, fmt.Errorf("failed to create iptables handler: %w", err)
}
iptablesCmdHandlers[v1core.IPv6Protocol] = iptHandler
ipset, err := utils.NewIPSet(true)
if err != nil {
return nil, nil, fmt.Errorf("failed to create ipset handler: %w", err)
}
ipSetHandlers[v1core.IPv6Protocol] = ipset
}
return iptablesCmdHandlers, ipSetHandlers, nil
}
// NewNetworkPolicyController returns new NetworkPolicyController object
func NewNetworkPolicyController(clientset kubernetes.Interface,
config *options.KubeRouterConfig, podInformer cache.SharedIndexInformer,
npInformer cache.SharedIndexInformer, nsInformer cache.SharedIndexInformer,
ipsetMutex *sync.Mutex,
iptablesCmdHandlers map[v1core.IPFamily]utils.IPTablesHandler,
ipSetHandlers map[v1core.IPFamily]utils.IPSetHandler) (*NetworkPolicyController, error) {
npc := NetworkPolicyController{ipsetMutex: ipsetMutex}
// Creating a single-item buffered channel to ensure that we only keep a single full sync request at a time,
// additional requests would be pointless to queue since after the first one was processed the system would already
// be up to date with all of the policy changes from any enqueued request after that
npc.fullSyncRequestChan = make(chan struct{}, 1)
// Validate and parse ClusterIP service range
if len(config.ClusterIPCIDRs) == 0 {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter, the list is empty")
}
_, primaryIpnet, err := net.ParseCIDR(strings.TrimSpace(config.ClusterIPCIDRs[0]))
if err != nil {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: %w", err)
}
npc.serviceClusterIPRanges = append(npc.serviceClusterIPRanges, *primaryIpnet)
// Validate that ClusterIP service range type matches the configuration
if config.EnableIPv4 && !config.EnableIPv6 {
if !netutils.IsIPv4CIDR(&npc.serviceClusterIPRanges[0]) {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: " +
"IPv4 is enabled but only IPv6 address is provided")
}
}
if !config.EnableIPv4 && config.EnableIPv6 {
if !netutils.IsIPv6CIDR(&npc.serviceClusterIPRanges[0]) {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: " +
"IPv6 is enabled but only IPv4 address is provided")
}
}
if len(config.ClusterIPCIDRs) > 1 {
if config.EnableIPv4 && config.EnableIPv6 {
_, secondaryIpnet, err := net.ParseCIDR(strings.TrimSpace(config.ClusterIPCIDRs[1]))
if err != nil {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: %v", err)
}
npc.serviceClusterIPRanges = append(npc.serviceClusterIPRanges, *secondaryIpnet)
ipv4Provided := netutils.IsIPv4CIDR(&npc.serviceClusterIPRanges[0]) ||
netutils.IsIPv4CIDR(&npc.serviceClusterIPRanges[1])
ipv6Provided := netutils.IsIPv6CIDR(&npc.serviceClusterIPRanges[0]) ||
netutils.IsIPv6CIDR(&npc.serviceClusterIPRanges[1])
if !(ipv4Provided && ipv6Provided) {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: " +
"dual-stack is enabled, both IPv4 and IPv6 addresses should be provided")
}
} else {
return nil, fmt.Errorf("too many CIDRs provided in --service-cluster-ip-range parameter: " +
"dual-stack must be enabled to provide two addresses")
}
}
if len(config.ClusterIPCIDRs) > 2 {
return nil, fmt.Errorf("too many CIDRs provided in --service-cluster-ip-range parameter, only two " +
"addresses are allowed at once for dual-stack")
}
// Validate and parse NodePort range
if npc.serviceNodePortRange, err = validateNodePortRange(config.NodePortRange); err != nil {
return nil, err
}
// Validate and parse ExternalIP service range
for _, externalIPRange := range config.ExternalIPCIDRs {
_, ipnet, err := net.ParseCIDR(externalIPRange)
if err != nil {
return nil, fmt.Errorf("failed to get parse --service-external-ip-range parameter: '%s'. Error: %s",
externalIPRange, err.Error())
}
npc.serviceExternalIPRanges = append(npc.serviceExternalIPRanges, *ipnet)
}
// Validate and parse LoadBalancerIP service range
for _, loadBalancerIPRange := range config.LoadBalancerCIDRs {
_, ipnet, err := net.ParseCIDR(loadBalancerIPRange)
if err != nil {
return nil, fmt.Errorf("failed to get parse --loadbalancer-ip-range parameter: '%s'. Error: %s",
loadBalancerIPRange, err.Error())
}
npc.serviceLoadBalancerIPRanges = append(npc.serviceLoadBalancerIPRanges, *ipnet)
}
if config.MetricsEnabled {
// Register the metrics for this controller
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesSyncTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV4SaveTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV6SaveTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV4RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV6RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyChainsSyncTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsetV4RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsetV6RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyChains)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsets)
npc.MetricsEnabled = true
}
npc.syncPeriod = config.IPTablesSyncPeriod
node, err := utils.GetNodeObject(clientset, config.HostnameOverride)
if err != nil {
return nil, err
}
npc.nodeHostName = node.Name
nodeIPv4, nodeIPv6 := utils.GetAllNodeIPs(node)
npc.iptablesCmdHandlers = iptablesCmdHandlers
npc.iptablesSaveRestore = make(map[v1core.IPFamily]utils.IPTablesSaveRestorer, 2)
npc.filterTableRules = make(map[v1core.IPFamily]*bytes.Buffer, 2)
npc.ipSetHandlers = ipSetHandlers
npc.nodeIPs = make(map[v1core.IPFamily]net.IP, 2)
if config.EnableIPv4 {
npc.iptablesSaveRestore[v1core.IPv4Protocol] = utils.NewIPTablesSaveRestore(v1core.IPv4Protocol)
var buf bytes.Buffer
npc.filterTableRules[v1core.IPv4Protocol] = &buf
// TODO: assuming that NPC should only use a single IP here is short-sighted, fix it so it considers all IPs
switch {
case len(nodeIPv4[v1core.NodeInternalIP]) > 0:
npc.nodeIPs[v1core.IPv4Protocol] = nodeIPv4[v1core.NodeInternalIP][0]
case len(nodeIPv4[v1core.NodeExternalIP]) > 0:
npc.nodeIPs[v1core.IPv4Protocol] = nodeIPv4[v1core.NodeExternalIP][0]
default:
return nil, fmt.Errorf("IPv4 was enabled but no IPv4 address was found on node")
}
}
if config.EnableIPv6 {
klog.V(2).Infof("IPv6 is enabled")
npc.iptablesSaveRestore[v1core.IPv6Protocol] = utils.NewIPTablesSaveRestore(v1core.IPv6Protocol)
var buf bytes.Buffer
npc.filterTableRules[v1core.IPv6Protocol] = &buf
// TODO: assuming that NPC should only use a single IP here is short-sighted, fix it so it considers all IPs
switch {
case len(nodeIPv6[v1core.NodeInternalIP]) > 0:
npc.nodeIPs[v1core.IPv6Protocol] = nodeIPv6[v1core.NodeInternalIP][0]
case len(nodeIPv6[v1core.NodeExternalIP]) > 0:
npc.nodeIPs[v1core.IPv6Protocol] = nodeIPv6[v1core.NodeExternalIP][0]
default:
return nil, fmt.Errorf("IPv6 was enabled but no IPv6 address was found on node")
}
}
npc.podLister = podInformer.GetIndexer()
npc.PodEventHandler = npc.newPodEventHandler()
npc.nsLister = nsInformer.GetIndexer()
npc.NamespaceEventHandler = npc.newNamespaceEventHandler()
npc.npLister = npInformer.GetIndexer()
npc.NetworkPolicyEventHandler = npc.newNetworkPolicyEventHandler()
return &npc, nil
}