kube-router/pkg/controllers/proxy/hairpin_controller.go
Aaron U'Ren 0f3714b9b7 fix(hairpin): set hairpin_mode for veth iface
It used to be that the kubelet handled setting hairpin mode for us:
https://github.com/kubernetes/kubernetes/pull/13628

Then this functionality moved to the dockershim:
https://github.com/kubernetes/kubernetes/pull/62212

Then the functionality was removed entirely:
https://github.com/kubernetes/kubernetes/commit/83265c9171f

Unfortunately, it was lost that we ever depended on this in order for
our hairpin implementation to work, if we ever knew it at all.
Additionally, I suspect that containerd and cri-o implementations never
worked correctly with hairpinning.

Without this, the NAT rules that we implement for hairpinning don't work
correctly. Because hairpin_mode isn't implemented on the virtual
interface of the container on the host, the packet bubbles up to the
kube-bridge. At some point in the traffic flow, the route back to the
pod gets resolved to the mac address inside the container, at that
point, the packet's source mac and destination mac don't match the
kube-bridge interface and the packet is black-holed.

This can also be fixed by putting the kube-bridge interface into
promiscuous mode so that it accepts all mac addresses, but I think that
going back to the original functionality of enabling hairpin_mode on the
veth interface of the container is likely the lesser of two evils here
as putting the kube-bridge interface into promiscuous mode will likely
have unintentional consequences.
2023-12-07 12:44:51 -06:00

122 lines
3.9 KiB
Go

package proxy
import (
"fmt"
"net"
"os"
"path"
"runtime"
"sync"
"time"
"github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck"
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
"github.com/vishvananda/netns"
"k8s.io/klog/v2"
)
type hairpinController struct {
epC <-chan string
nsc *NetworkServicesController
}
func (hpc *hairpinController) Run(stopCh <-chan struct{}, wg *sync.WaitGroup,
healthChan chan<- *healthcheck.ControllerHeartbeat) {
defer wg.Done()
klog.Infof("Starting hairping controller (handles setting hairpin_mode for veth interfaces)")
t := time.NewTicker(healthcheck.HPCSyncPeriod)
defer t.Stop()
for {
// Add an additional non-blocking select to ensure that if the stopCh channel is closed it is handled first
select {
case <-stopCh:
klog.Info("Shutting down Hairpin Controller goroutine")
return
default:
}
select {
case <-stopCh:
klog.Info("Shutting down Hairpin Controller goroutine")
return
case endpointIP := <-hpc.epC:
klog.V(1).Infof("Received request for hairpin setup of endpoint %s, processing", endpointIP)
err := hpc.ensureHairpinEnabledForPodInterface(endpointIP)
if err != nil {
klog.Errorf("unable to set hairpin mode for endpoint %s, its possible that hairpinning will not "+
"work as expected. Error was: %v",
endpointIP, err)
}
case <-t.C:
healthcheck.SendHeartBeat(healthChan, "HPC")
}
}
}
func (hpc *hairpinController) ensureHairpinEnabledForPodInterface(endpointIP string) error {
klog.V(2).Infof("Attempting to enable hairpin mode for endpoint IP %s", endpointIP)
crRuntime, containerID, err := hpc.nsc.findContainerRuntimeReferences(endpointIP)
if err != nil {
return err
}
klog.V(2).Infof("Detected runtime %s and container ID %s for endpoint IP %s", crRuntime, containerID, endpointIP)
runtime.LockOSThread()
defer runtime.UnlockOSThread()
hostNetworkNSHandle, err := netns.Get()
if err != nil {
return fmt.Errorf("failed to get namespace due to %v", err)
}
defer utils.CloseCloserDisregardError(&hostNetworkNSHandle)
var pid int
if crRuntime == "docker" {
// WARN: This method is deprecated and will be removed once docker-shim is removed from kubelet.
pid, err = hpc.nsc.ln.getContainerPidWithDocker(containerID)
if err != nil {
return fmt.Errorf("failed to prepare endpoint %s to do direct server return due to %v",
endpointIP, err)
}
} else {
// We expect CRI compliant runtimes here
// ugly workaround, refactoring of pkg/Proxy is required
pid, err = hpc.nsc.ln.getContainerPidWithCRI(hpc.nsc.dsr.runtimeEndpoint, containerID)
if err != nil {
return fmt.Errorf("failed to prepare endpoint %s to do DSR due to: %v", endpointIP, err)
}
}
klog.V(2).Infof("Found PID %d for endpoint IP %s", pid, endpointIP)
// Get the interface link ID from inside the container so that we can link it to the veth on the host namespace
ifaceID, err := hpc.nsc.ln.findIfaceLinkForPid(pid)
if err != nil {
return fmt.Errorf("failed to find the interface ID inside the container NS for endpoint IP: %s, due to: %v",
endpointIP, err)
}
klog.V(2).Infof("Found Interface Link ID %d for endpoint IP %s", ifaceID, endpointIP)
ifaceName, err := net.InterfaceByIndex(ifaceID)
if err != nil {
return fmt.Errorf("failed to get the interface name from the link ID inside the container for endpoint IP: "+
"%s and Interface ID: %d due to: %v", endpointIP, ifaceID, err)
}
klog.V(1).Infof("Enabling hairpin for interface %s for endpoint IP %s", ifaceName.Name, endpointIP)
hpPath := path.Join(sysFSVirtualNetPath, ifaceName.Name, sysFSHairpinRelPath)
if _, err := os.Stat(hpPath); err != nil {
return fmt.Errorf("hairpin path %s doesn't appear to exist for us to set", hpPath)
}
return os.WriteFile(hpPath, []byte(hairpinEnable), 0644)
}
func NewHairpinController(nsc *NetworkServicesController, endpointCh <-chan string) *hairpinController {
hpc := hairpinController{
nsc: nsc,
epC: endpointCh,
}
return &hpc
}