kube-router/pkg/controllers/netpol/network_policy_controller.go
Aaron U'Ren ecaad2c6e4 fix(cleanup): add missing handlers for cleanup
kube-router v2.X introduced the idea of iptables and ipset handlers that
allow kube-router to be dual-stack capable. However, the cleanup logic
for the various controllers was not properly ported when this happened.
When the cleanup functions run, they often have not had their
controllers fully initialized as cleanup should not be dependant on
kube-router being able to reach a kube-apiserver.

As such, they were missing these handlers. And as such they either
silently ended up doing noops or worse, they would run into nil pointer
failures.

This corrects that, so that kube-router no longer fails this way and
cleans up as it had in v1.X.
2024-04-26 14:16:09 -05:00

978 lines
37 KiB
Go

package netpol
import (
"bytes"
"crypto/sha256"
"encoding/base32"
"fmt"
"net"
"strconv"
"strings"
"sync"
"time"
"github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck"
"github.com/cloudnativelabs/kube-router/v2/pkg/metrics"
"github.com/cloudnativelabs/kube-router/v2/pkg/options"
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
"github.com/coreos/go-iptables/iptables"
"k8s.io/klog/v2"
v1core "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
netutils "k8s.io/utils/net"
)
const (
kubePodFirewallChainPrefix = "KUBE-POD-FW-"
kubeNetworkPolicyChainPrefix = "KUBE-NWPLCY-"
kubeSourceIPSetPrefix = "KUBE-SRC-"
kubeDestinationIPSetPrefix = "KUBE-DST-"
kubeInputChainName = "KUBE-ROUTER-INPUT"
kubeForwardChainName = "KUBE-ROUTER-FORWARD"
kubeOutputChainName = "KUBE-ROUTER-OUTPUT"
kubeDefaultNetpolChain = "KUBE-NWPLCY-DEFAULT"
kubeIngressPolicyType = "ingress"
kubeEgressPolicyType = "egress"
kubeBothPolicyType = "both"
syncVersionBase = 10
)
var (
defaultChains = map[string]string{
"INPUT": kubeInputChainName,
"FORWARD": kubeForwardChainName,
"OUTPUT": kubeOutputChainName,
}
)
// Network policy controller provides both ingress and egress filtering for the pods as per the defined network
// policies. Two different types of iptables chains are used. Each pod running on the node which either
// requires ingress or egress filtering gets a pod specific chains. Each network policy has a iptables chain, which
// has rules expressed through ipsets matching source and destination pod ip's. In the FORWARD chain of the
// filter table a rule is added to jump the traffic originating (in case of egress network policy) from the pod
// or destined (in case of ingress network policy) to the pod specific iptables chain. Each
// pod specific iptables chain has rules to jump to the network polices chains, that pod matches. So packet
// originating/destined from/to pod goes through filter table's, FORWARD chain, followed by pod specific chain,
// followed by one or more network policy chains, till there is a match which will accept the packet, or gets
// dropped by the rule in the pod chain, if there is no match.
// NetworkPolicyController struct to hold information required by NetworkPolicyController
type NetworkPolicyController struct {
nodeHostName string
serviceClusterIPRanges []net.IPNet
serviceExternalIPRanges []net.IPNet
serviceLoadBalancerIPRanges []net.IPNet
serviceNodePortRange string
mu sync.Mutex
syncPeriod time.Duration
MetricsEnabled bool
healthChan chan<- *healthcheck.ControllerHeartbeat
fullSyncRequestChan chan struct{}
ipsetMutex *sync.Mutex
iptablesCmdHandlers map[v1core.IPFamily]utils.IPTablesHandler
iptablesSaveRestore map[v1core.IPFamily]utils.IPTablesSaveRestorer
filterTableRules map[v1core.IPFamily]*bytes.Buffer
ipSetHandlers map[v1core.IPFamily]utils.IPSetHandler
nodeIPs map[v1core.IPFamily]net.IP
podLister cache.Indexer
npLister cache.Indexer
nsLister cache.Indexer
PodEventHandler cache.ResourceEventHandler
NamespaceEventHandler cache.ResourceEventHandler
NetworkPolicyEventHandler cache.ResourceEventHandler
}
// internal structure to represent a network policy
type networkPolicyInfo struct {
name string
namespace string
podSelector labels.Selector
// set of pods matching network policy spec podselector label selector
targetPods map[string]podInfo
// whitelist ingress rules from the network policy spec
ingressRules []ingressRule
// whitelist egress rules from the network policy spec
egressRules []egressRule
// policy type "ingress" or "egress" or "both" as defined by PolicyType in the spec
policyType string
}
// internal structure to represent Pod
type podInfo struct {
ip string
ips []v1core.PodIP
name string
namespace string
labels map[string]string
}
// internal structure to represent NetworkPolicyIngressRule in the spec
type ingressRule struct {
matchAllPorts bool
ports []protocolAndPort
namedPorts []endPoints
matchAllSource bool
srcPods []podInfo
srcIPBlocks map[v1core.IPFamily][][]string
}
// internal structure to represent NetworkPolicyEgressRule in the spec
type egressRule struct {
matchAllPorts bool
ports []protocolAndPort
namedPorts []endPoints
matchAllDestinations bool
dstPods []podInfo
dstIPBlocks map[v1core.IPFamily][][]string
}
type protocolAndPort struct {
protocol string
port string
endport string
}
type endPoints struct {
ips map[v1core.IPFamily][]string
protocolAndPort
}
type numericPort2eps map[string]*endPoints
type protocol2eps map[string]numericPort2eps
type namedPort2eps map[string]protocol2eps
// Run runs forever till we receive notification on stopCh
func (npc *NetworkPolicyController) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{},
wg *sync.WaitGroup) {
t := time.NewTicker(npc.syncPeriod)
defer t.Stop()
defer wg.Done()
klog.Info("Starting network policy controller")
npc.healthChan = healthChan
// setup kube-router specific top level custom chains (KUBE-ROUTER-INPUT, KUBE-ROUTER-FORWARD, KUBE-ROUTER-OUTPUT)
npc.ensureTopLevelChains()
// setup default network policy chain that is applied to traffic from/to the pods that does not match any network
// policy
npc.ensureDefaultNetworkPolicyChain()
// Full syncs of the network policy controller take a lot of time and can only be processed one at a time,
// therefore, we start it in it's own goroutine and request a sync through a single item channel
klog.Info("Starting network policy controller full sync goroutine")
wg.Add(1)
go func(fullSyncRequest <-chan struct{}, stopCh <-chan struct{}, wg *sync.WaitGroup) {
defer wg.Done()
for {
// Add an additional non-blocking select to ensure that if the stopCh channel is closed it is handled first
select {
case <-stopCh:
klog.Info("Shutting down network policies full sync goroutine")
return
default:
}
select {
case <-stopCh:
klog.Info("Shutting down network policies full sync goroutine")
return
case <-fullSyncRequest:
klog.V(3).Info("Received request for a full sync, processing")
npc.fullPolicySync() // fullPolicySync() is a blocking request here
}
}
}(npc.fullSyncRequestChan, stopCh, wg)
// loop forever till notified to stop on stopCh
for {
klog.V(1).Info("Requesting periodic sync of iptables to reflect network policies")
npc.RequestFullSync()
select {
case <-stopCh:
klog.Infof("Shutting down network policies controller")
return
case <-t.C:
}
}
}
// RequestFullSync allows the request of a full network policy sync without blocking the callee
func (npc *NetworkPolicyController) RequestFullSync() {
select {
case npc.fullSyncRequestChan <- struct{}{}:
klog.V(3).Info("Full sync request queue was empty so a full sync request was successfully sent")
default: // Don't block if the buffered channel is full, return quickly so that we don't block callee execution
klog.V(1).Info("Full sync request queue was full, skipping...")
}
}
// Sync synchronizes iptables to desired state of network policies
func (npc *NetworkPolicyController) fullPolicySync() {
var err error
var networkPoliciesInfo []networkPolicyInfo
npc.mu.Lock()
defer npc.mu.Unlock()
healthcheck.SendHeartBeat(npc.healthChan, "NPC")
start := time.Now()
syncVersion := strconv.FormatInt(start.UnixNano(), syncVersionBase)
defer func() {
endTime := time.Since(start)
if npc.MetricsEnabled {
metrics.ControllerIptablesSyncTime.Observe(endTime.Seconds())
}
klog.V(1).Infof("sync iptables took %v", endTime)
}()
klog.V(1).Infof("Starting sync of iptables with version: %s", syncVersion)
// ensure kube-router specific top level chains and corresponding rules exist
npc.ensureTopLevelChains()
// ensure default network policy chain that is applied to traffic from/to the pods that does not match any network
// policy
npc.ensureDefaultNetworkPolicyChain()
networkPoliciesInfo, err = npc.buildNetworkPoliciesInfo()
if err != nil {
klog.Errorf("Aborting sync. Failed to build network policies: %v", err.Error())
return
}
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
npc.filterTableRules[ipFamily].Reset()
saveStart := time.Now()
err := iptablesSaveRestore.SaveInto("filter", npc.filterTableRules[ipFamily])
saveEndTime := time.Since(saveStart)
if npc.MetricsEnabled {
//nolint:exhaustive // we don't need exhaustive searching for IP Families
switch ipFamily {
case v1core.IPv4Protocol:
metrics.ControllerIptablesV4SaveTime.Observe(saveEndTime.Seconds())
case v1core.IPv6Protocol:
metrics.ControllerIptablesV6SaveTime.Observe(saveEndTime.Seconds())
}
}
klog.V(1).Infof("Saving %v iptables rules took %v", ipFamily, saveEndTime)
if err != nil {
klog.Errorf("Aborting sync. Failed to run iptables-save: %v", err.Error())
return
}
}
activePolicyChains, activePolicyIPSets, err := npc.syncNetworkPolicyChains(networkPoliciesInfo, syncVersion)
if err != nil {
klog.Errorf("Aborting sync. Failed to sync network policy chains: %v", err.Error())
return
}
activePodFwChains := npc.syncPodFirewallChains(networkPoliciesInfo, syncVersion)
// Makes sure that the ACCEPT rules for packets marked with "0x20000" are added to the end of each of kube-router's
// top level chains
npc.ensureExplicitAccept()
err = npc.cleanupStaleRules(activePolicyChains, activePodFwChains, false)
if err != nil {
klog.Errorf("Aborting sync. Failed to cleanup stale iptables rules: %v", err.Error())
return
}
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
ipFamily := ipFamily
restoreStart := time.Now()
err := iptablesSaveRestore.Restore("filter", npc.filterTableRules[ipFamily].Bytes())
restoreEndTime := time.Since(restoreStart)
if npc.MetricsEnabled {
//nolint:exhaustive // we don't need exhaustive searching for IP Families
switch ipFamily {
case v1core.IPv4Protocol:
metrics.ControllerIptablesV4RestoreTime.Observe(restoreEndTime.Seconds())
case v1core.IPv6Protocol:
metrics.ControllerIptablesV6RestoreTime.Observe(restoreEndTime.Seconds())
}
}
klog.V(1).Infof("Restoring %v iptables rules took %v", ipFamily, restoreEndTime)
if err != nil {
klog.Errorf("Aborting sync. Failed to run iptables-restore: %v\n%s",
err.Error(), npc.filterTableRules[ipFamily].String())
return
}
}
err = npc.cleanupStaleIPSets(activePolicyIPSets)
if err != nil {
klog.Errorf("Failed to cleanup stale ipsets: %v", err.Error())
return
}
}
func (npc *NetworkPolicyController) iptablesCmdHandlerForCIDR(cidr *net.IPNet) (utils.IPTablesHandler, error) {
if netutils.IsIPv4CIDR(cidr) {
return npc.iptablesCmdHandlers[v1core.IPv4Protocol], nil
}
if netutils.IsIPv6CIDR(cidr) {
return npc.iptablesCmdHandlers[v1core.IPv6Protocol], nil
}
return nil, fmt.Errorf("invalid CIDR")
}
func (npc *NetworkPolicyController) allowTrafficToClusterIPRange(
serviceVIPPosition int,
serviceClusterIPRange *net.IPNet,
addUUIDForRuleSpec func(chain string, ruleSpec *[]string) (string, error),
ensureRuleAtPosition func(iptablesCmdHandler utils.IPTablesHandler,
chain string, ruleSpec []string, uuid string, position int),
comment string) {
whitelistServiceVips := []string{"-m", "comment", "--comment", comment,
"-d", serviceClusterIPRange.String(), "-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
iptablesCmdHandler, err := npc.iptablesCmdHandlerForCIDR(serviceClusterIPRange)
if err != nil {
klog.Fatalf("Failed to get iptables handler: %s", err.Error())
}
ensureRuleAtPosition(iptablesCmdHandler,
kubeInputChainName, whitelistServiceVips, uuid, serviceVIPPosition)
}
// Creates custom chains KUBE-ROUTER-INPUT, KUBE-ROUTER-FORWARD, KUBE-ROUTER-OUTPUT
// and following rules in the filter table to jump from builtin chain to custom chain
// -A INPUT -m comment --comment "kube-router netpol" -j KUBE-ROUTER-INPUT
// -A FORWARD -m comment --comment "kube-router netpol" -j KUBE-ROUTER-FORWARD
// -A OUTPUT -m comment --comment "kube-router netpol" -j KUBE-ROUTER-OUTPUT
func (npc *NetworkPolicyController) ensureTopLevelChains() {
const serviceVIPPosition = 1
rulePosition := map[v1core.IPFamily]int{v1core.IPv4Protocol: 1, v1core.IPv6Protocol: 1}
addUUIDForRuleSpec := func(chain string, ruleSpec *[]string) (string, error) {
hash := sha256.Sum256([]byte(chain + strings.Join(*ruleSpec, "")))
encoded := base32.StdEncoding.EncodeToString(hash[:])[:16]
for idx, part := range *ruleSpec {
if part == "--comment" {
(*ruleSpec)[idx+1] = (*ruleSpec)[idx+1] + " - " + encoded
return encoded, nil
}
}
return "", fmt.Errorf("could not find a comment in the ruleSpec string given: %s",
strings.Join(*ruleSpec, " "))
}
ensureRuleAtPosition := func(
iptablesCmdHandler utils.IPTablesHandler, chain string, ruleSpec []string, uuid string, position int) {
exists, err := iptablesCmdHandler.Exists("filter", chain, ruleSpec...)
if err != nil {
klog.Fatalf("Failed to verify rule exists in %s chain due to %s", chain, err.Error())
}
if !exists {
klog.V(2).Infof("Rule '%s' doesn't exist in chain %s, inserting at position %d",
strings.Join(ruleSpec, " "), chain, position)
err := iptablesCmdHandler.Insert("filter", chain, position, ruleSpec...)
if err != nil {
klog.Fatalf("Failed to run iptables command to insert in %s chain %s", chain, err.Error())
}
return
}
rules, err := iptablesCmdHandler.List("filter", chain)
if err != nil {
klog.Fatalf("failed to list rules in filter table %s chain due to %s", chain, err.Error())
}
var ruleNo, ruleIndexOffset int
for i, rule := range rules {
rule = strings.Replace(rule, "\"", "", 2) // removes quote from comment string
if strings.HasPrefix(rule, "-P") || strings.HasPrefix(rule, "-N") {
// if this chain has a default policy, then it will show as rule #1 from iptablesCmdHandler.List so we
// need to account for this offset
ruleIndexOffset++
continue
}
if strings.Contains(rule, uuid) {
// range uses a 0 index, but iptables uses a 1 index so we need to increase ruleNo by 1
ruleNo = i + 1 - ruleIndexOffset
break
}
}
if ruleNo != position {
klog.V(2).Infof("Rule '%s' existed in chain %s, but was in position %d instead of %d, "+
"moving...", strings.Join(ruleSpec, " "), chain, ruleNo, position)
err = iptablesCmdHandler.Insert("filter", chain, position, ruleSpec...)
if err != nil {
klog.Fatalf("Failed to run iptables command to insert in %s chain %s", chain, err.Error())
}
err = iptablesCmdHandler.Delete("filter", chain, strconv.Itoa(ruleNo+1))
if err != nil {
klog.Fatalf("Failed to delete incorrect rule in %s chain due to %s", chain, err.Error())
}
}
}
for _, handler := range npc.iptablesCmdHandlers {
for builtinChain, customChain := range defaultChains {
exists, err := handler.ChainExists("filter", customChain)
if err != nil {
klog.Fatalf("failed to run iptables command to create %s chain due to %s", customChain,
err.Error())
}
if !exists {
klog.V(2).Infof("Custom chain was missing, creating: %s in filter table", customChain)
err = handler.NewChain("filter", customChain)
if err != nil {
klog.Fatalf("failed to run iptables command to create %s chain due to %s", customChain,
err.Error())
}
}
args := []string{"-m", "comment", "--comment", "kube-router netpol", "-j", customChain}
uuid, err := addUUIDForRuleSpec(builtinChain, &args)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
klog.V(2).Infof("Ensuring jump to chain %s from %s at position %d", customChain, builtinChain,
serviceVIPPosition)
ensureRuleAtPosition(handler, builtinChain, args, uuid, serviceVIPPosition)
}
}
if len(npc.serviceClusterIPRanges) > 0 {
for idx, serviceRange := range npc.serviceClusterIPRanges {
var family v1core.IPFamily
if serviceRange.IP.To4() != nil {
family = v1core.IPv4Protocol
} else {
family = v1core.IPv6Protocol
}
klog.V(2).Infof("Allow traffic to ingress towards Cluster IP Range: %s for family: %s",
serviceRange.String(), family)
npc.allowTrafficToClusterIPRange(rulePosition[family], &npc.serviceClusterIPRanges[idx],
addUUIDForRuleSpec, ensureRuleAtPosition, "allow traffic to primary/secondary cluster IP range")
rulePosition[family]++
}
} else {
klog.Fatalf("Primary service cluster IP range is not configured")
}
for family, handler := range npc.iptablesCmdHandlers {
whitelistTCPNodeports := []string{"-p", "tcp", "-m", "comment", "--comment",
"allow LOCAL TCP traffic to node ports", "-m", "addrtype", "--dst-type", "LOCAL",
"-m", "multiport", "--dports", npc.serviceNodePortRange, "-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistTCPNodeports)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
klog.V(2).Infof("Allow TCP traffic to ingress towards node port range: %s for family: %s",
npc.serviceNodePortRange, family)
ensureRuleAtPosition(handler,
kubeInputChainName, whitelistTCPNodeports, uuid, rulePosition[family])
rulePosition[family]++
whitelistUDPNodeports := []string{"-p", "udp", "-m", "comment", "--comment",
"allow LOCAL UDP traffic to node ports", "-m", "addrtype", "--dst-type", "LOCAL",
"-m", "multiport", "--dports", npc.serviceNodePortRange, "-j", "RETURN"}
uuid, err = addUUIDForRuleSpec(kubeInputChainName, &whitelistUDPNodeports)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
klog.V(2).Infof("Allow UDP traffic to ingress towards node port range: %s for family: %s",
npc.serviceNodePortRange, family)
ensureRuleAtPosition(handler,
kubeInputChainName, whitelistUDPNodeports, uuid, rulePosition[family])
rulePosition[family]++
}
for idx, externalIPRange := range npc.serviceExternalIPRanges {
var family v1core.IPFamily
if externalIPRange.IP.To4() != nil {
family = v1core.IPv4Protocol
} else {
family = v1core.IPv6Protocol
}
whitelistServiceVips := []string{"-m", "comment", "--comment",
"allow traffic to external IP range: " + externalIPRange.String(), "-d", externalIPRange.String(),
"-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
// Access externalIPRange via index to avoid implicit memory aliasing
cidrHandler, err := npc.iptablesCmdHandlerForCIDR(&npc.serviceExternalIPRanges[idx])
if err != nil {
klog.Fatalf("Failed to get iptables handler: %s", err.Error())
}
klog.V(2).Infof("Allow traffic to ingress towards External IP Range: %s for family: %s",
externalIPRange.String(), family)
ensureRuleAtPosition(cidrHandler,
kubeInputChainName, whitelistServiceVips, uuid, rulePosition[family])
rulePosition[family]++
}
for idx, loadBalancerIPRange := range npc.serviceLoadBalancerIPRanges {
var family v1core.IPFamily
if loadBalancerIPRange.IP.To4() != nil {
family = v1core.IPv4Protocol
} else {
family = v1core.IPv6Protocol
}
whitelistServiceVips := []string{"-m", "comment", "--comment",
"allow traffic to load balancer IP range: " + loadBalancerIPRange.String(), "-d", loadBalancerIPRange.String(),
"-j", "RETURN"}
uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips)
if err != nil {
klog.Fatalf("Failed to get uuid for rule: %s", err.Error())
}
// Access loadBalancerIPRange via index to avoid implicit memory aliasing
cidrHandler, err := npc.iptablesCmdHandlerForCIDR(&npc.serviceLoadBalancerIPRanges[idx])
if err != nil {
klog.Fatalf("Failed to get iptables handler: %s", err.Error())
}
klog.V(2).Infof("Allow traffic to ingress towards Load Balancer IP Range: %s for family: %s",
loadBalancerIPRange.String(), family)
ensureRuleAtPosition(cidrHandler,
kubeInputChainName, whitelistServiceVips, uuid, rulePosition[family])
rulePosition[family]++
}
}
func (npc *NetworkPolicyController) ensureExplicitAccept() {
// for the traffic to/from the local pod's let network policy controller be
// authoritative entity to ACCEPT the traffic if it complies to network policies
for _, filterTableRules := range npc.filterTableRules {
for _, chain := range defaultChains {
comment := "\"rule to explicitly ACCEPT traffic that comply to network policies\""
args := []string{"-m", "comment", "--comment", comment, "-m", "mark", "--mark", "0x20000/0x20000",
"-j", "ACCEPT"}
utils.AppendUnique(filterTableRules, chain, args)
}
}
}
// Creates custom chains KUBE-NWPLCY-DEFAULT
func (npc *NetworkPolicyController) ensureDefaultNetworkPolicyChain() {
for _, iptablesCmdHandler := range npc.iptablesCmdHandlers {
markArgs := make([]string, 0)
markComment := "rule to mark traffic matching a network policy"
markArgs = append(markArgs, "-j", "MARK", "-m", "comment", "--comment", markComment,
"--set-xmark", "0x10000/0x10000")
exists, err := iptablesCmdHandler.ChainExists("filter", kubeDefaultNetpolChain)
if err != nil {
klog.Fatalf("failed to check for the existence of chain %s, error: %v", kubeDefaultNetpolChain, err)
}
if !exists {
err = iptablesCmdHandler.NewChain("filter", kubeDefaultNetpolChain)
if err != nil {
klog.Fatalf("failed to run iptables command to create %s chain due to %s",
kubeDefaultNetpolChain, err.Error())
}
}
err = iptablesCmdHandler.AppendUnique("filter", kubeDefaultNetpolChain, markArgs...)
if err != nil {
klog.Fatalf("Failed to run iptables command: %s", err.Error())
}
}
}
func (npc *NetworkPolicyController) cleanupStaleRules(activePolicyChains, activePodFwChains map[string]bool,
deleteDefaultChains bool) error {
cleanupPodFwChains := make([]string, 0)
cleanupPolicyChains := make([]string, 0)
for ipFamily, iptablesCmdHandler := range npc.iptablesCmdHandlers {
// find iptables chains and ipsets that are no longer used by comparing current to the active maps we were passed
chains, err := iptablesCmdHandler.ListChains("filter")
if err != nil {
return fmt.Errorf("unable to list chains: %w", err)
}
for _, chain := range chains {
if strings.HasPrefix(chain, kubeNetworkPolicyChainPrefix) {
if chain == kubeDefaultNetpolChain {
continue
}
if _, ok := activePolicyChains[chain]; !ok {
cleanupPolicyChains = append(cleanupPolicyChains, chain)
continue
}
}
if strings.HasPrefix(chain, kubePodFirewallChainPrefix) {
if _, ok := activePodFwChains[chain]; !ok {
cleanupPodFwChains = append(cleanupPodFwChains, chain)
continue
}
}
}
var newChains, newRules, desiredFilterTable bytes.Buffer
rules := strings.Split(npc.filterTableRules[ipFamily].String(), "\n")
if len(rules) > 0 && rules[len(rules)-1] == "" {
rules = rules[:len(rules)-1]
}
for _, rule := range rules {
skipRule := false
for _, podFWChainName := range cleanupPodFwChains {
if strings.Contains(rule, podFWChainName) {
skipRule = true
break
}
}
for _, policyChainName := range cleanupPolicyChains {
if strings.Contains(rule, policyChainName) {
skipRule = true
break
}
}
if deleteDefaultChains {
for _, chain := range []string{kubeInputChainName, kubeForwardChainName, kubeOutputChainName,
kubeDefaultNetpolChain} {
if strings.Contains(rule, chain) {
skipRule = true
break
}
}
}
if strings.Contains(rule, "COMMIT") || strings.HasPrefix(rule, "# ") {
skipRule = true
}
if skipRule {
continue
}
if strings.HasPrefix(rule, ":") {
newChains.WriteString(rule + " - [0:0]\n")
}
if strings.HasPrefix(rule, "-") {
newRules.WriteString(rule + "\n")
}
}
desiredFilterTable.WriteString("*filter" + "\n")
desiredFilterTable.Write(newChains.Bytes())
desiredFilterTable.Write(newRules.Bytes())
desiredFilterTable.WriteString("COMMIT" + "\n")
npc.filterTableRules[ipFamily] = &desiredFilterTable
}
return nil
}
func (npc *NetworkPolicyController) cleanupStaleIPSets(activePolicyIPSets map[string]bool) error {
// There are certain actions like Cleanup() actions that aren't working with full instantiations of the controller
// and in these instances the mutex may not be present and may not need to be present as they are operating out of a
// single goroutine where there is no need for locking
if nil != npc.ipsetMutex {
klog.V(1).Infof("Attempting to attain ipset mutex lock")
npc.ipsetMutex.Lock()
klog.V(1).Infof("Attained ipset mutex lock, continuing...")
defer func() {
npc.ipsetMutex.Unlock()
klog.V(1).Infof("Returned ipset mutex lock")
}()
}
for ipFamily, ipsets := range npc.ipSetHandlers {
cleanupPolicyIPSets := make([]*utils.Set, 0)
if err := ipsets.Save(); err != nil {
klog.Fatalf("failed to initialize ipsets command executor due to %s", err.Error())
}
if ipFamily == v1core.IPv6Protocol {
for _, set := range ipsets.Sets() {
if strings.HasPrefix(set.Name, fmt.Sprintf("%s:%s", utils.FamillyInet6, kubeSourceIPSetPrefix)) ||
strings.HasPrefix(set.Name, fmt.Sprintf("%s:%s", utils.FamillyInet6, kubeDestinationIPSetPrefix)) {
if _, ok := activePolicyIPSets[set.Name]; !ok {
cleanupPolicyIPSets = append(cleanupPolicyIPSets, set)
}
}
}
} else {
for _, set := range ipsets.Sets() {
if strings.HasPrefix(set.Name, kubeSourceIPSetPrefix) ||
strings.HasPrefix(set.Name, kubeDestinationIPSetPrefix) {
if _, ok := activePolicyIPSets[set.Name]; !ok {
cleanupPolicyIPSets = append(cleanupPolicyIPSets, set)
}
}
}
}
// cleanup network policy ipsets
for _, set := range cleanupPolicyIPSets {
if err := set.Destroy(); err != nil {
return fmt.Errorf("failed to delete ipset %s due to %s", set.Name, err)
}
}
}
return nil
}
// Cleanup cleanup configurations done
func (npc *NetworkPolicyController) Cleanup() {
klog.Info("Cleaning up NetworkPolicyController configurations...")
if len(npc.iptablesCmdHandlers) < 1 {
iptablesCmdHandlers, ipSetHandlers, err := NewIPTablesHandlers(nil)
if err != nil {
klog.Errorf("unable to get iptables and ipset handlers: %v", err)
return
}
npc.iptablesCmdHandlers = iptablesCmdHandlers
npc.ipSetHandlers = ipSetHandlers
// Make other structures that we rely on
npc.iptablesSaveRestore = make(map[v1core.IPFamily]utils.IPTablesSaveRestorer, 2)
npc.iptablesSaveRestore[v1core.IPv4Protocol] = utils.NewIPTablesSaveRestore(v1core.IPv4Protocol)
npc.iptablesSaveRestore[v1core.IPv6Protocol] = utils.NewIPTablesSaveRestore(v1core.IPv6Protocol)
npc.filterTableRules = make(map[v1core.IPFamily]*bytes.Buffer, 2)
var buf bytes.Buffer
npc.filterTableRules[v1core.IPv4Protocol] = &buf
var buf2 bytes.Buffer
npc.filterTableRules[v1core.IPv6Protocol] = &buf2
}
var emptySet map[string]bool
// Take a dump (iptables-save) of the current filter table for cleanupStaleRules() to work on
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
if err := iptablesSaveRestore.SaveInto("filter", npc.filterTableRules[ipFamily]); err != nil {
klog.Errorf("error encountered attempting to list iptables rules for cleanup: %v", err)
return
}
}
// Run cleanupStaleRules() to get rid of most of the kube-router rules (this is the same logic that runs as
// part NPC's runtime loop). Setting the last parameter to true causes even the default chains are removed.
err := npc.cleanupStaleRules(emptySet, emptySet, true)
if err != nil {
klog.Errorf("error encountered attempting to cleanup iptables rules: %v", err)
return
}
// Restore (iptables-restore) npc's cleaned up version of the iptables filter chain
for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore {
if err = iptablesSaveRestore.Restore("filter", npc.filterTableRules[ipFamily].Bytes()); err != nil {
klog.Errorf(
"error encountered while loading running iptables-restore: %v\n%s", err,
npc.filterTableRules[ipFamily].String())
}
}
// Cleanup ipsets
err = npc.cleanupStaleIPSets(emptySet)
if err != nil {
klog.Errorf("error encountered while cleaning ipsets: %v", err)
return
}
klog.Infof("Successfully cleaned the NetworkPolicyController configurations done by kube-router")
}
func NewIPTablesHandlers(config *options.KubeRouterConfig) (
map[v1core.IPFamily]utils.IPTablesHandler, map[v1core.IPFamily]utils.IPSetHandler, error) {
iptablesCmdHandlers := make(map[v1core.IPFamily]utils.IPTablesHandler, 2)
ipSetHandlers := make(map[v1core.IPFamily]utils.IPSetHandler, 2)
if config == nil || config.EnableIPv4 {
iptHandler, err := iptables.NewWithProtocol(iptables.ProtocolIPv4)
if err != nil {
return nil, nil, fmt.Errorf("failed to create iptables handler: %w", err)
}
iptablesCmdHandlers[v1core.IPv4Protocol] = iptHandler
ipset, err := utils.NewIPSet(false)
if err != nil {
return nil, nil, fmt.Errorf("failed to create ipset handler: %w", err)
}
ipSetHandlers[v1core.IPv4Protocol] = ipset
}
if config == nil || config.EnableIPv6 {
iptHandler, err := iptables.NewWithProtocol(iptables.ProtocolIPv6)
if err != nil {
return nil, nil, fmt.Errorf("failed to create iptables handler: %w", err)
}
iptablesCmdHandlers[v1core.IPv6Protocol] = iptHandler
ipset, err := utils.NewIPSet(true)
if err != nil {
return nil, nil, fmt.Errorf("failed to create ipset handler: %w", err)
}
ipSetHandlers[v1core.IPv6Protocol] = ipset
}
return iptablesCmdHandlers, ipSetHandlers, nil
}
// NewNetworkPolicyController returns new NetworkPolicyController object
func NewNetworkPolicyController(clientset kubernetes.Interface,
config *options.KubeRouterConfig, podInformer cache.SharedIndexInformer,
npInformer cache.SharedIndexInformer, nsInformer cache.SharedIndexInformer,
ipsetMutex *sync.Mutex,
iptablesCmdHandlers map[v1core.IPFamily]utils.IPTablesHandler,
ipSetHandlers map[v1core.IPFamily]utils.IPSetHandler) (*NetworkPolicyController, error) {
npc := NetworkPolicyController{ipsetMutex: ipsetMutex}
// Creating a single-item buffered channel to ensure that we only keep a single full sync request at a time,
// additional requests would be pointless to queue since after the first one was processed the system would already
// be up to date with all of the policy changes from any enqueued request after that
npc.fullSyncRequestChan = make(chan struct{}, 1)
// Validate and parse ClusterIP service range
if len(config.ClusterIPCIDRs) == 0 {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter, the list is empty")
}
_, primaryIpnet, err := net.ParseCIDR(strings.TrimSpace(config.ClusterIPCIDRs[0]))
if err != nil {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: %w", err)
}
npc.serviceClusterIPRanges = append(npc.serviceClusterIPRanges, *primaryIpnet)
// Validate that ClusterIP service range type matches the configuration
if config.EnableIPv4 && !config.EnableIPv6 {
if !netutils.IsIPv4CIDR(&npc.serviceClusterIPRanges[0]) {
//nolint:goconst // we don't care about abstracting an error message
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: " +
"IPv4 is enabled but only IPv6 address is provided")
}
}
if !config.EnableIPv4 && config.EnableIPv6 {
if !netutils.IsIPv6CIDR(&npc.serviceClusterIPRanges[0]) {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: " +
"IPv6 is enabled but only IPv4 address is provided")
}
}
if len(config.ClusterIPCIDRs) > 1 {
if config.EnableIPv4 && config.EnableIPv6 {
_, secondaryIpnet, err := net.ParseCIDR(strings.TrimSpace(config.ClusterIPCIDRs[1]))
if err != nil {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: %v", err)
}
npc.serviceClusterIPRanges = append(npc.serviceClusterIPRanges, *secondaryIpnet)
ipv4Provided := netutils.IsIPv4CIDR(&npc.serviceClusterIPRanges[0]) ||
netutils.IsIPv4CIDR(&npc.serviceClusterIPRanges[1])
ipv6Provided := netutils.IsIPv6CIDR(&npc.serviceClusterIPRanges[0]) ||
netutils.IsIPv6CIDR(&npc.serviceClusterIPRanges[1])
if !(ipv4Provided && ipv6Provided) {
return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: " +
"dual-stack is enabled, both IPv4 and IPv6 addresses should be provided")
}
} else {
return nil, fmt.Errorf("too many CIDRs provided in --service-cluster-ip-range parameter: " +
"dual-stack must be enabled to provide two addresses")
}
}
if len(config.ClusterIPCIDRs) > 2 {
return nil, fmt.Errorf("too many CIDRs provided in --service-cluster-ip-range parameter, only two " +
"addresses are allowed at once for dual-stack")
}
// Validate and parse NodePort range
if npc.serviceNodePortRange, err = validateNodePortRange(config.NodePortRange); err != nil {
return nil, err
}
// Validate and parse ExternalIP service range
for _, externalIPRange := range config.ExternalIPCIDRs {
_, ipnet, err := net.ParseCIDR(externalIPRange)
if err != nil {
return nil, fmt.Errorf("failed to get parse --service-external-ip-range parameter: '%s'. Error: %s",
externalIPRange, err.Error())
}
npc.serviceExternalIPRanges = append(npc.serviceExternalIPRanges, *ipnet)
}
// Validate and parse LoadBalancerIP service range
for _, loadBalancerIPRange := range config.LoadBalancerCIDRs {
_, ipnet, err := net.ParseCIDR(loadBalancerIPRange)
if err != nil {
return nil, fmt.Errorf("failed to get parse --loadbalancer-ip-range parameter: '%s'. Error: %s",
loadBalancerIPRange, err.Error())
}
npc.serviceLoadBalancerIPRanges = append(npc.serviceLoadBalancerIPRanges, *ipnet)
}
if config.MetricsEnabled {
// Register the metrics for this controller
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesSyncTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV4SaveTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV6SaveTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV4RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV6RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyChainsSyncTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsetV4RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsetV6RestoreTime)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyChains)
metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsets)
npc.MetricsEnabled = true
}
npc.syncPeriod = config.IPTablesSyncPeriod
node, err := utils.GetNodeObject(clientset, config.HostnameOverride)
if err != nil {
return nil, err
}
npc.nodeHostName = node.Name
nodeIPv4, nodeIPv6 := utils.GetAllNodeIPs(node)
npc.iptablesCmdHandlers = iptablesCmdHandlers
npc.iptablesSaveRestore = make(map[v1core.IPFamily]utils.IPTablesSaveRestorer, 2)
npc.filterTableRules = make(map[v1core.IPFamily]*bytes.Buffer, 2)
npc.ipSetHandlers = ipSetHandlers
npc.nodeIPs = make(map[v1core.IPFamily]net.IP, 2)
if config.EnableIPv4 {
npc.iptablesSaveRestore[v1core.IPv4Protocol] = utils.NewIPTablesSaveRestore(v1core.IPv4Protocol)
var buf bytes.Buffer
npc.filterTableRules[v1core.IPv4Protocol] = &buf
// TODO: assuming that NPC should only use a single IP here is short-sighted, fix it so it considers all IPs
switch {
case len(nodeIPv4[v1core.NodeInternalIP]) > 0:
npc.nodeIPs[v1core.IPv4Protocol] = nodeIPv4[v1core.NodeInternalIP][0]
case len(nodeIPv4[v1core.NodeExternalIP]) > 0:
npc.nodeIPs[v1core.IPv4Protocol] = nodeIPv4[v1core.NodeExternalIP][0]
default:
return nil, fmt.Errorf("IPv4 was enabled but no IPv4 address was found on node")
}
}
if config.EnableIPv6 {
klog.V(2).Infof("IPv6 is enabled")
npc.iptablesSaveRestore[v1core.IPv6Protocol] = utils.NewIPTablesSaveRestore(v1core.IPv6Protocol)
var buf bytes.Buffer
npc.filterTableRules[v1core.IPv6Protocol] = &buf
// TODO: assuming that NPC should only use a single IP here is short-sighted, fix it so it considers all IPs
switch {
case len(nodeIPv6[v1core.NodeInternalIP]) > 0:
npc.nodeIPs[v1core.IPv6Protocol] = nodeIPv6[v1core.NodeInternalIP][0]
case len(nodeIPv6[v1core.NodeExternalIP]) > 0:
npc.nodeIPs[v1core.IPv6Protocol] = nodeIPv6[v1core.NodeExternalIP][0]
default:
return nil, fmt.Errorf("IPv6 was enabled but no IPv6 address was found on node")
}
}
npc.podLister = podInformer.GetIndexer()
npc.PodEventHandler = npc.newPodEventHandler()
npc.nsLister = nsInformer.GetIndexer()
npc.NamespaceEventHandler = npc.newNamespaceEventHandler()
npc.npLister = npInformer.GetIndexer()
npc.NetworkPolicyEventHandler = npc.newNetworkPolicyEventHandler()
return &npc, nil
}