mirror of
https://github.com/tailscale/tailscale.git
synced 2025-12-23 10:12:07 +01:00
net/udprelay: XDP PoC
do not merge Updates tailscale/corp#34849 Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
parent
6a44990b09
commit
c3f9d1c22e
@ -25,6 +25,7 @@ import (
|
||||
"golang.org/x/crypto/blake2s"
|
||||
"golang.org/x/net/ipv6"
|
||||
"tailscale.com/disco"
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/net/batching"
|
||||
"tailscale.com/net/netaddr"
|
||||
"tailscale.com/net/netcheck"
|
||||
@ -34,6 +35,7 @@ import (
|
||||
"tailscale.com/net/stun"
|
||||
"tailscale.com/net/udprelay/endpoint"
|
||||
"tailscale.com/net/udprelay/status"
|
||||
"tailscale.com/net/udprelay/xdp"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstime"
|
||||
"tailscale.com/types/key"
|
||||
@ -75,6 +77,7 @@ type Server struct {
|
||||
wg sync.WaitGroup
|
||||
closeCh chan struct{}
|
||||
netChecker *netcheck.Client
|
||||
fib xdp.FIB
|
||||
|
||||
mu sync.Mutex // guards the following fields
|
||||
macSecrets [][blake2s.Size]byte // [0] is most recent, max 2 elements
|
||||
@ -140,7 +143,7 @@ func blakeMACFromBindMsg(blakeKey [blake2s.Size]byte, src netip.AddrPort, msg di
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
|
||||
func (e *serverEndpoint) handleDiscoControlMsg(logf logger.Logf, fib xdp.FIB, from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
|
||||
if senderIndex != 0 && senderIndex != 1 {
|
||||
return nil, netip.AddrPort{}
|
||||
}
|
||||
@ -218,6 +221,12 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex
|
||||
e.boundAddrPorts[senderIndex] = from
|
||||
e.lastSeen[senderIndex] = time.Now() // record last seen as bound time
|
||||
e.inProgressGeneration[senderIndex] = 0 // reset to zero, which indicates there is no in-progress handshake
|
||||
if fib != nil && e.boundAddrPorts[0].IsValid() && e.boundAddrPorts[1].IsValid() {
|
||||
err = fib.Upsert(e.vni, e.boundAddrPorts)
|
||||
if err != nil {
|
||||
logf("error upserting fib: %v", err)
|
||||
}
|
||||
}
|
||||
return nil, netip.AddrPort{}
|
||||
}
|
||||
}
|
||||
@ -229,7 +238,7 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex
|
||||
}
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
|
||||
func (e *serverEndpoint) handleSealedDiscoControlMsg(logf logger.Logf, fib xdp.FIB, from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
|
||||
senderRaw, isDiscoMsg := disco.Source(b)
|
||||
if !isDiscoMsg {
|
||||
// Not a Disco message
|
||||
@ -260,7 +269,7 @@ func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []by
|
||||
return nil, netip.AddrPort{}
|
||||
}
|
||||
|
||||
return e.handleDiscoControlMsg(from, senderIndex, discoMsg, serverDisco, macSecrets)
|
||||
return e.handleDiscoControlMsg(logf, fib, from, senderIndex, discoMsg, serverDisco, macSecrets)
|
||||
}
|
||||
|
||||
func (e *serverEndpoint) handleDataPacket(from netip.AddrPort, b []byte, now time.Time) (write []byte, to netip.AddrPort) {
|
||||
@ -323,6 +332,17 @@ func NewServer(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (s *Serv
|
||||
byVNI: make(map[uint32]*serverEndpoint),
|
||||
}
|
||||
s.discoPublic = s.disco.Public()
|
||||
xdpDev := envknob.String("TS_PEER_RELAY_XDP_DEVICE")
|
||||
if xdpDev != "" {
|
||||
s.fib, err = xdp.NewFIB(&xdp.FIBConfig{
|
||||
DstPort: port,
|
||||
DeviceName: xdpDev,
|
||||
})
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO(creachadair): Find a way to plumb this in during initialization.
|
||||
// As-written, messages published here will not be seen by other components
|
||||
@ -547,11 +567,11 @@ func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {
|
||||
func (s *Server) bindSockets(desiredPort uint16) error {
|
||||
// maxSocketsPerAF is a conservative starting point, but is somewhat
|
||||
// arbitrary.
|
||||
maxSocketsPerAF := min(16, runtime.NumCPU())
|
||||
maxSocketsPerAF := min(128, runtime.NumCPU())
|
||||
listenConfig := &net.ListenConfig{
|
||||
Control: listenControl,
|
||||
}
|
||||
for _, network := range []string{"udp4", "udp6"} {
|
||||
for _, network := range []string{"udp4"} { //, "udp6"} {
|
||||
SocketsLoop:
|
||||
for i := range maxSocketsPerAF {
|
||||
if i > 0 {
|
||||
@ -626,6 +646,9 @@ func (s *Server) bindSocketTo(listenConfig *net.ListenConfig, network string, po
|
||||
// Close closes the server.
|
||||
func (s *Server) Close() error {
|
||||
s.closeOnce.Do(func() {
|
||||
if s.fib != nil {
|
||||
s.fib.Close()
|
||||
}
|
||||
for _, uc4 := range s.uc4 {
|
||||
uc4.Close()
|
||||
}
|
||||
@ -662,6 +685,15 @@ func (s *Server) endpointGCLoop() {
|
||||
if v.isExpired(now, s.bindLifetime, s.steadyStateLifetime) {
|
||||
delete(s.byDisco, k)
|
||||
delete(s.byVNI, v.vni)
|
||||
// TODO: isExpired only considers userspace counters/liveliness
|
||||
// TODO: this is a syscall per VNI to delete while holding s.mu,
|
||||
// consider batch delete
|
||||
if s.fib != nil {
|
||||
err := s.fib.Delete(v.vni)
|
||||
if err != nil {
|
||||
s.logf("failed to delete fib entry: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -708,7 +740,7 @@ func (s *Server) handlePacket(from netip.AddrPort, b []byte) (write []byte, to n
|
||||
}
|
||||
msg := b[packet.GeneveFixedHeaderLength:]
|
||||
s.maybeRotateMACSecretLocked(now)
|
||||
return e.handleSealedDiscoControlMsg(from, msg, s.discoPublic, s.macSecrets)
|
||||
return e.handleSealedDiscoControlMsg(s.logf, s.fib, from, msg, s.discoPublic, s.macSecrets)
|
||||
}
|
||||
return e.handleDataPacket(from, b, now)
|
||||
}
|
||||
|
||||
131
net/udprelay/xdp/bpf_bpfeb.go
Normal file
131
net/udprelay/xdp/bpf_bpfeb.go
Normal file
@ -0,0 +1,131 @@
|
||||
// Code generated by bpf2go; DO NOT EDIT.
|
||||
//go:build mips || mips64 || ppc64 || s390x
|
||||
|
||||
package xdp
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
)
|
||||
|
||||
type bpfConfig struct{ DstPort uint16 }
|
||||
|
||||
type bpfEndpoint struct {
|
||||
ParticipantAddrs [2][4]uint32
|
||||
ParticipantPorts [2]uint16
|
||||
ParticipantIsIpv6 [2]uint8
|
||||
_ [2]byte
|
||||
}
|
||||
|
||||
// loadBpf returns the embedded CollectionSpec for bpf.
|
||||
func loadBpf() (*ebpf.CollectionSpec, error) {
|
||||
reader := bytes.NewReader(_BpfBytes)
|
||||
spec, err := ebpf.LoadCollectionSpecFromReader(reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't load bpf: %w", err)
|
||||
}
|
||||
|
||||
return spec, err
|
||||
}
|
||||
|
||||
// loadBpfObjects loads bpf and converts it into a struct.
|
||||
//
|
||||
// The following types are suitable as obj argument:
|
||||
//
|
||||
// *bpfObjects
|
||||
// *bpfPrograms
|
||||
// *bpfMaps
|
||||
//
|
||||
// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
|
||||
func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
|
||||
spec, err := loadBpf()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return spec.LoadAndAssign(obj, opts)
|
||||
}
|
||||
|
||||
// bpfSpecs contains maps and programs before they are loaded into the kernel.
|
||||
//
|
||||
// It can be passed ebpf.CollectionSpec.Assign.
|
||||
type bpfSpecs struct {
|
||||
bpfProgramSpecs
|
||||
bpfMapSpecs
|
||||
}
|
||||
|
||||
// bpfSpecs contains programs before they are loaded into the kernel.
|
||||
//
|
||||
// It can be passed ebpf.CollectionSpec.Assign.
|
||||
type bpfProgramSpecs struct {
|
||||
XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"`
|
||||
}
|
||||
|
||||
// bpfMapSpecs contains maps before they are loaded into the kernel.
|
||||
//
|
||||
// It can be passed ebpf.CollectionSpec.Assign.
|
||||
type bpfMapSpecs struct {
|
||||
ConfigMap *ebpf.MapSpec `ebpf:"config_map"`
|
||||
EndpointMap *ebpf.MapSpec `ebpf:"endpoint_map"`
|
||||
}
|
||||
|
||||
// bpfObjects contains all objects after they have been loaded into the kernel.
|
||||
//
|
||||
// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
|
||||
type bpfObjects struct {
|
||||
bpfPrograms
|
||||
bpfMaps
|
||||
}
|
||||
|
||||
func (o *bpfObjects) Close() error {
|
||||
return _BpfClose(
|
||||
&o.bpfPrograms,
|
||||
&o.bpfMaps,
|
||||
)
|
||||
}
|
||||
|
||||
// bpfMaps contains all maps after they have been loaded into the kernel.
|
||||
//
|
||||
// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
|
||||
type bpfMaps struct {
|
||||
ConfigMap *ebpf.Map `ebpf:"config_map"`
|
||||
EndpointMap *ebpf.Map `ebpf:"endpoint_map"`
|
||||
}
|
||||
|
||||
func (m *bpfMaps) Close() error {
|
||||
return _BpfClose(
|
||||
m.ConfigMap,
|
||||
m.EndpointMap,
|
||||
)
|
||||
}
|
||||
|
||||
// bpfPrograms contains all programs after they have been loaded into the kernel.
|
||||
//
|
||||
// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
|
||||
type bpfPrograms struct {
|
||||
XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"`
|
||||
}
|
||||
|
||||
func (p *bpfPrograms) Close() error {
|
||||
return _BpfClose(
|
||||
p.XdpProgFunc,
|
||||
)
|
||||
}
|
||||
|
||||
func _BpfClose(closers ...io.Closer) error {
|
||||
for _, closer := range closers {
|
||||
if err := closer.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do not access this directly.
|
||||
//
|
||||
//go:embed bpf_bpfeb.o
|
||||
var _BpfBytes []byte
|
||||
BIN
net/udprelay/xdp/bpf_bpfeb.o
Normal file
BIN
net/udprelay/xdp/bpf_bpfeb.o
Normal file
Binary file not shown.
131
net/udprelay/xdp/bpf_bpfel.go
Normal file
131
net/udprelay/xdp/bpf_bpfel.go
Normal file
@ -0,0 +1,131 @@
|
||||
// Code generated by bpf2go; DO NOT EDIT.
|
||||
//go:build 386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64
|
||||
|
||||
package xdp
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
)
|
||||
|
||||
type bpfConfig struct{ DstPort uint16 }
|
||||
|
||||
type bpfEndpoint struct {
|
||||
ParticipantAddrs [2][4]uint32
|
||||
ParticipantPorts [2]uint16
|
||||
ParticipantIsIpv6 [2]uint8
|
||||
_ [2]byte
|
||||
}
|
||||
|
||||
// loadBpf returns the embedded CollectionSpec for bpf.
|
||||
func loadBpf() (*ebpf.CollectionSpec, error) {
|
||||
reader := bytes.NewReader(_BpfBytes)
|
||||
spec, err := ebpf.LoadCollectionSpecFromReader(reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't load bpf: %w", err)
|
||||
}
|
||||
|
||||
return spec, err
|
||||
}
|
||||
|
||||
// loadBpfObjects loads bpf and converts it into a struct.
|
||||
//
|
||||
// The following types are suitable as obj argument:
|
||||
//
|
||||
// *bpfObjects
|
||||
// *bpfPrograms
|
||||
// *bpfMaps
|
||||
//
|
||||
// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
|
||||
func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
|
||||
spec, err := loadBpf()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return spec.LoadAndAssign(obj, opts)
|
||||
}
|
||||
|
||||
// bpfSpecs contains maps and programs before they are loaded into the kernel.
|
||||
//
|
||||
// It can be passed ebpf.CollectionSpec.Assign.
|
||||
type bpfSpecs struct {
|
||||
bpfProgramSpecs
|
||||
bpfMapSpecs
|
||||
}
|
||||
|
||||
// bpfSpecs contains programs before they are loaded into the kernel.
|
||||
//
|
||||
// It can be passed ebpf.CollectionSpec.Assign.
|
||||
type bpfProgramSpecs struct {
|
||||
XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"`
|
||||
}
|
||||
|
||||
// bpfMapSpecs contains maps before they are loaded into the kernel.
|
||||
//
|
||||
// It can be passed ebpf.CollectionSpec.Assign.
|
||||
type bpfMapSpecs struct {
|
||||
ConfigMap *ebpf.MapSpec `ebpf:"config_map"`
|
||||
EndpointMap *ebpf.MapSpec `ebpf:"endpoint_map"`
|
||||
}
|
||||
|
||||
// bpfObjects contains all objects after they have been loaded into the kernel.
|
||||
//
|
||||
// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
|
||||
type bpfObjects struct {
|
||||
bpfPrograms
|
||||
bpfMaps
|
||||
}
|
||||
|
||||
func (o *bpfObjects) Close() error {
|
||||
return _BpfClose(
|
||||
&o.bpfPrograms,
|
||||
&o.bpfMaps,
|
||||
)
|
||||
}
|
||||
|
||||
// bpfMaps contains all maps after they have been loaded into the kernel.
|
||||
//
|
||||
// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
|
||||
type bpfMaps struct {
|
||||
ConfigMap *ebpf.Map `ebpf:"config_map"`
|
||||
EndpointMap *ebpf.Map `ebpf:"endpoint_map"`
|
||||
}
|
||||
|
||||
func (m *bpfMaps) Close() error {
|
||||
return _BpfClose(
|
||||
m.ConfigMap,
|
||||
m.EndpointMap,
|
||||
)
|
||||
}
|
||||
|
||||
// bpfPrograms contains all programs after they have been loaded into the kernel.
|
||||
//
|
||||
// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
|
||||
type bpfPrograms struct {
|
||||
XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"`
|
||||
}
|
||||
|
||||
func (p *bpfPrograms) Close() error {
|
||||
return _BpfClose(
|
||||
p.XdpProgFunc,
|
||||
)
|
||||
}
|
||||
|
||||
func _BpfClose(closers ...io.Closer) error {
|
||||
for _, closer := range closers {
|
||||
if err := closer.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do not access this directly.
|
||||
//
|
||||
//go:embed bpf_bpfel.o
|
||||
var _BpfBytes []byte
|
||||
BIN
net/udprelay/xdp/bpf_bpfel.o
Normal file
BIN
net/udprelay/xdp/bpf_bpfel.o
Normal file
Binary file not shown.
350
net/udprelay/xdp/xdp.c
Normal file
350
net/udprelay/xdp/xdp.c
Normal file
@ -0,0 +1,350 @@
|
||||
//go:build ignore
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/udp.h>
|
||||
#include <bpf_endian.h>
|
||||
#include <bpf_helpers.h>
|
||||
|
||||
char _license[4] SEC("license") = "GPL";
|
||||
|
||||
struct config {
|
||||
__u16 dst_port;
|
||||
};
|
||||
struct config *unused_config __attribute__((unused)); // required by bpf2go -type
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(struct config));
|
||||
__uint(max_entries, 1);
|
||||
} config_map SEC(".maps");
|
||||
|
||||
struct endpoint {
|
||||
__be32 participant_addrs[2][4];
|
||||
__u16 participant_ports[2];
|
||||
__u8 participant_is_ipv6[2];
|
||||
};
|
||||
struct endpoint *unused_endpoint __attribute__((unused)); // required by bpf2go -type
|
||||
|
||||
#define MAX_GENEVE_VNI (1 << 24) - 1
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
|
||||
__uint(key_size, sizeof(__u32)); // key is Geneve VNI
|
||||
__uint(value_size, sizeof(struct endpoint));
|
||||
__uint(max_entries, MAX_GENEVE_VNI);
|
||||
} endpoint_map SEC(".maps");
|
||||
|
||||
#define MAX_UDP_LEN_IPV4 1480
|
||||
|
||||
#define MAX_UDP_LEN_IPV6 1460
|
||||
|
||||
#define IP_MF 0x2000
|
||||
#define IP_OFFSET 0x1fff
|
||||
|
||||
/*
|
||||
Geneve Header:
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
|Ver| Opt Len |O|C| Rsvd. | Protocol Type |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
| Virtual Network Identifier (VNI) | Reserved |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
| |
|
||||
~ Variable-Length Options ~
|
||||
| |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
*/
|
||||
struct geneve_header {
|
||||
__u8 first;
|
||||
__u8 second;
|
||||
__be16 protocol;
|
||||
__be32 vni;
|
||||
};
|
||||
|
||||
static __always_inline __u16 csum_fold(__u32 csum) {
|
||||
__u32 sum;
|
||||
sum = (csum >> 16) + (csum & 0xffff); // maximum value 0x1fffe
|
||||
sum += (sum >> 16); // maximum value 0xffff
|
||||
return sum;
|
||||
}
|
||||
|
||||
static __always_inline __u16 csum_fold_flip(__u32 csum) {
|
||||
__u32 sum;
|
||||
sum = (csum >> 16) + (csum & 0xffff); // maximum value 0x1fffe
|
||||
sum += (sum >> 16); // maximum value 0xffff
|
||||
return ~sum;
|
||||
}
|
||||
|
||||
static __always_inline __u32 pseudo_sum_ipv6(struct ipv6hdr* ip6, __u16 udp_len) {
|
||||
__u32 pseudo = 0; // TODO(jwhited): __u64 for intermediate checksum values to reduce number of ops
|
||||
for (int i = 0; i < 8; i ++) {
|
||||
pseudo += ip6->saddr.in6_u.u6_addr16[i];
|
||||
pseudo += ip6->daddr.in6_u.u6_addr16[i];
|
||||
}
|
||||
pseudo += bpf_htons(ip6->nexthdr);
|
||||
pseudo += udp_len;
|
||||
return pseudo;
|
||||
}
|
||||
|
||||
static __always_inline __u32 pseudo_sum_ipv4(struct iphdr* ip, __u16 udp_len) {
|
||||
__u32 pseudo = (__u16)ip->saddr;
|
||||
pseudo += (__u16)(ip->saddr >> 16);
|
||||
pseudo += (__u16)ip->daddr;
|
||||
pseudo += (__u16)(ip->daddr >> 16);
|
||||
pseudo += bpf_htons(ip->protocol);
|
||||
pseudo += udp_len;
|
||||
return pseudo;
|
||||
}
|
||||
|
||||
// csum_const_size is an alternative to bpf_csum_diff. It's a verifier
|
||||
// workaround for when we are forced to use a constant max_size + bounds
|
||||
// checking. The alternative being passing a dynamic length to bpf_csum_diff
|
||||
// {from,to}_size arguments, which the verifier can't follow. For further info
|
||||
// see: https://github.com/iovisor/bcc/issues/2463#issuecomment-512503958
|
||||
static __always_inline __u16 csum_const_size(__u32 seed, void* from, void* data_end, int max_size) {
|
||||
__u16 *buf = from;
|
||||
for (int i = 0; i < max_size; i += 2) {
|
||||
if ((void *)(buf + 1) > data_end) {
|
||||
break;
|
||||
}
|
||||
seed += *buf;
|
||||
buf++;
|
||||
}
|
||||
if ((void *)buf + 1 <= data_end) {
|
||||
seed += *(__u8 *)buf;
|
||||
}
|
||||
return csum_fold_flip(seed);
|
||||
}
|
||||
|
||||
SEC("xdp")
|
||||
int xdp_prog_func(struct xdp_md *ctx) {
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
void *data = (void *)(long)ctx->data;
|
||||
|
||||
struct ethhdr *eth = data;
|
||||
if ((void *)(eth + 1) > data_end) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
struct iphdr *ip;
|
||||
struct ipv6hdr *ip6;
|
||||
struct udphdr *udp;
|
||||
|
||||
int validate_udp_csum = 0;
|
||||
int is_ipv6 = 0;
|
||||
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
|
||||
ip = (void *)(eth + 1);
|
||||
if ((void *)(ip + 1) > data_end) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (ip->ihl != 5 ||
|
||||
ip->version != 4 ||
|
||||
ip->protocol != IPPROTO_UDP ||
|
||||
(ip->frag_off & bpf_htons(IP_MF | IP_OFFSET)) != 0) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
// validate ipv4 header checksum
|
||||
__u32 cs_unfolded = bpf_csum_diff(0, 0, (void *)ip, sizeof(*ip), 0);
|
||||
__u16 cs = csum_fold_flip(cs_unfolded);
|
||||
if (cs != 0) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (bpf_ntohs(ip->tot_len) != data_end - (void *)ip) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
udp = (void *)(ip + 1);
|
||||
if ((void *)(udp + 1) > data_end) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (udp->check != 0) {
|
||||
// https://datatracker.ietf.org/doc/html/rfc768#page-3
|
||||
// If the computed checksum is zero, it is transmitted as all
|
||||
// ones (the equivalent in one's complement arithmetic). An all
|
||||
// zero transmitted checksum value means that the transmitter
|
||||
// generated no checksum (for debugging or for higher level
|
||||
// protocols that don't care).
|
||||
validate_udp_csum = 1;
|
||||
}
|
||||
} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
|
||||
ip6 = (void *)(eth + 1);
|
||||
if ((void *)(ip6 + 1) > data_end) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (ip6->version != 6 || ip6->nexthdr != IPPROTO_UDP) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
udp = (void *)(ip6 + 1);
|
||||
if ((void *)(udp + 1) > data_end) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (bpf_ntohs(ip6->payload_len) != data_end - (void *)udp) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
// https://datatracker.ietf.org/doc/html/rfc8200#page-28
|
||||
// Unlike IPv4, the default behavior when UDP packets are
|
||||
// originated by an IPv6 node is that the UDP checksum is not
|
||||
// optional. That is, whenever originating a UDP packet, an IPv6
|
||||
// node must compute a UDP checksum over the packet and the
|
||||
// pseudo-header, and, if that computation yields a result of
|
||||
// zero, it must be changed to hex FFFF for placement in the UDP
|
||||
// header. IPv6 receivers must discard UDP packets containing a
|
||||
// zero checksum and should log the error.
|
||||
validate_udp_csum = 1;
|
||||
is_ipv6 = 1;
|
||||
} else {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
__u32 config_key = 0;
|
||||
struct config *c = bpf_map_lookup_elem(&config_map, &config_key);
|
||||
if (!c) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (bpf_ntohs(udp->len) != data_end - (void *)udp) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (bpf_ntohs(udp->dest) != c->dst_port) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (validate_udp_csum) {
|
||||
__u16 cs;
|
||||
__u32 pseudo_sum;
|
||||
if (is_ipv6) {
|
||||
pseudo_sum = pseudo_sum_ipv6(ip6, udp->len);
|
||||
cs = csum_const_size(pseudo_sum, udp, data_end, MAX_UDP_LEN_IPV6);
|
||||
} else {
|
||||
pseudo_sum = pseudo_sum_ipv4(ip, udp->len);
|
||||
cs = csum_const_size(pseudo_sum, udp, data_end, MAX_UDP_LEN_IPV4);
|
||||
}
|
||||
if (cs != 0) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
}
|
||||
|
||||
struct geneve_header *geneve = (void *)(udp + 1);
|
||||
if ((void *)(geneve +1) > data_end) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (geneve->first != 0) {
|
||||
// first 2 bits are version, must be zero
|
||||
// next 6 bits are opt len, must be zero
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (geneve->second != 0) {
|
||||
// first bit is control, must be zero
|
||||
// next bit is critical (options), must be zero
|
||||
// next 6 bits are reserved, must be zero
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if ((geneve->vni & 0x000000FF) != 0) {
|
||||
// last byte is reserved, must be zero
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
__u32 vni_key = bpf_ntohl(geneve->vni) >> 8;
|
||||
struct endpoint *e = bpf_map_lookup_elem(&endpoint_map, &vni_key);
|
||||
if (!e) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
int out_participant_index = -1; // -1 = unmatched
|
||||
if (is_ipv6) {
|
||||
// TODO
|
||||
} else {
|
||||
for (int i = 0; i < 2; i ++) {
|
||||
if (e->participant_is_ipv6[i] == 0 &&
|
||||
e->participant_addrs[i][3] == ip->saddr &&
|
||||
e->participant_ports[i] == bpf_ntohs(udp->source))
|
||||
{
|
||||
if (i == 0) {
|
||||
out_participant_index = 1;
|
||||
} else {
|
||||
out_participant_index = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (out_participant_index == -1) {
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
if (e->participant_is_ipv6[out_participant_index] == is_ipv6) {
|
||||
// matching in/out address family
|
||||
if (is_ipv6) {
|
||||
// TODO: in ipv6, out ipv6
|
||||
} else {
|
||||
// TODO: in ipv4, out ipv4
|
||||
|
||||
// Update IPv4 header
|
||||
__be32 p_addr = e->participant_addrs[out_participant_index][3];
|
||||
__u32 ip_csum = ~(__u32)ip->check;
|
||||
__u32 udp_csum = ~(__u32)udp->check;
|
||||
ip_csum = bpf_csum_diff(&ip->saddr, 4, &p_addr, 4, ip_csum);
|
||||
udp_csum = bpf_csum_diff(&ip->saddr, 4, &p_addr, 4, udp_csum);
|
||||
ip->check = csum_fold_flip(ip_csum);
|
||||
ip->saddr = ip->daddr;
|
||||
ip->daddr = p_addr;
|
||||
|
||||
#define AF_INET 2
|
||||
struct bpf_fib_lookup fib_params = {};
|
||||
fib_params.family = AF_INET;
|
||||
fib_params.tos = ip->tos;
|
||||
fib_params.l4_protocol = ip->protocol;
|
||||
fib_params.sport = 0;
|
||||
fib_params.dport = 0;
|
||||
fib_params.tot_len = bpf_ntohs(ip->tot_len);
|
||||
fib_params.ipv4_src = ip->saddr;
|
||||
fib_params.ipv4_dst = ip->daddr;
|
||||
fib_params.ifindex = ctx->ingress_ifindex;
|
||||
|
||||
int rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), BPF_FIB_LOOKUP_DIRECT);
|
||||
if (rc != BPF_FIB_LKUP_RET_SUCCESS) {
|
||||
return XDP_ABORTED;
|
||||
}
|
||||
|
||||
// Rewrite ethernet header source and destination address.
|
||||
__builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
|
||||
__builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
|
||||
|
||||
// Update UDP header
|
||||
__u32 old_udp_port = (__u32)udp->source;
|
||||
__u32 new_udp_port = (__u32)bpf_htons(e->participant_ports[out_participant_index]);
|
||||
udp_csum = bpf_csum_diff(&old_udp_port, 4, &new_udp_port, 4, udp_csum);
|
||||
udp->check = csum_fold_flip(udp_csum);
|
||||
udp->source = udp->dest;
|
||||
udp->dest = bpf_htons(e->participant_ports[out_participant_index]);
|
||||
udp = (void *)(ip + 1);
|
||||
if ((void *)(udp +1) > data_end) {
|
||||
return XDP_ABORTED;
|
||||
}
|
||||
|
||||
return XDP_TX;
|
||||
}
|
||||
} else if (e->participant_is_ipv6[out_participant_index] == 0) {
|
||||
// TODO: in ipv4, out ipv6
|
||||
} else {
|
||||
// TODO: in ipv6, out ipv4
|
||||
}
|
||||
|
||||
return XDP_PASS;
|
||||
}
|
||||
48
net/udprelay/xdp/xdp.go
Normal file
48
net/udprelay/xdp/xdp.go
Normal file
@ -0,0 +1,48 @@
|
||||
package xdp
|
||||
|
||||
import "net/netip"
|
||||
|
||||
// XDPAttachFlags represents how XDP program will be attached to interface. This
|
||||
// is a mirror of cilium/ebpf/link.AttachFlags, without pulling it in for
|
||||
// non-Linux.
|
||||
type XDPAttachFlags uint32
|
||||
|
||||
const (
|
||||
// XDPDriverFallbackGenericMode attempts XDPDriverMode, and falls back to
|
||||
// XDPGenericMode if the driver does not support XDP.
|
||||
XDPDriverFallbackGenericMode = 0
|
||||
)
|
||||
|
||||
const (
|
||||
// XDPGenericMode (SKB) links XDP BPF program for drivers which do
|
||||
// not yet support native XDP.
|
||||
XDPGenericMode XDPAttachFlags = 1 << (iota + 1)
|
||||
// XDPDriverMode links XDP BPF program into the driver’s receive path.
|
||||
XDPDriverMode
|
||||
// XDPOffloadMode offloads the entire XDP BPF program into hardware.
|
||||
XDPOffloadMode
|
||||
)
|
||||
|
||||
type FIBConfig struct {
|
||||
DeviceName string
|
||||
// TODO: DstPort is singular, but udp4 and udp6 can be independent ports if
|
||||
// the user supplied a zero port value.
|
||||
DstPort uint16
|
||||
AttachFlags XDPAttachFlags
|
||||
}
|
||||
|
||||
func (f FIBConfig) validate() error { return nil }
|
||||
|
||||
type FIBOption interface {
|
||||
apply(*fibOptions)
|
||||
}
|
||||
|
||||
type fibOptions struct {
|
||||
noAttach bool
|
||||
}
|
||||
|
||||
type FIB interface {
|
||||
Delete(vni uint32) error
|
||||
Upsert(vni uint32, participants [2]netip.AddrPort) error
|
||||
Close() error
|
||||
}
|
||||
103
net/udprelay/xdp/xdp_linux.go
Normal file
103
net/udprelay/xdp/xdp_linux.go
Normal file
@ -0,0 +1,103 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build linux
|
||||
|
||||
package xdp
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/netip"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/link"
|
||||
)
|
||||
|
||||
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type config -type endpoint bpf xdp.c -- -I ../../../derp/xdp/headers
|
||||
|
||||
func NewFIB(config *FIBConfig, opts ...FIBOption) (FIB, error) {
|
||||
o := &fibOptions{}
|
||||
for _, opt := range opts {
|
||||
opt.apply(o)
|
||||
}
|
||||
err := config.validate()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid config: %v", err)
|
||||
}
|
||||
objs := new(bpfObjects)
|
||||
err = loadBpfObjects(objs, nil)
|
||||
if err != nil {
|
||||
var ve *ebpf.VerifierError
|
||||
if errors.As(err, &ve) {
|
||||
err = fmt.Errorf("verifier error: %+v", ve)
|
||||
}
|
||||
return nil, fmt.Errorf("error loading XDP program: %w", err)
|
||||
}
|
||||
f := &linuxFIB{
|
||||
objs: objs,
|
||||
dstPort: config.DstPort,
|
||||
}
|
||||
var key uint32
|
||||
xdpConfig := &bpfConfig{
|
||||
DstPort: config.DstPort,
|
||||
}
|
||||
err = objs.ConfigMap.Put(key, xdpConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading config in eBPF map: %w", err)
|
||||
}
|
||||
if o.noAttach {
|
||||
return f, nil
|
||||
}
|
||||
iface, err := net.InterfaceByName(config.DeviceName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error finding device: %w", err)
|
||||
}
|
||||
link, err := link.AttachXDP(link.XDPOptions{
|
||||
Program: objs.XdpProgFunc,
|
||||
Interface: iface.Index,
|
||||
Flags: link.XDPAttachFlags(config.AttachFlags),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error attaching XDP program to dev: %w", err)
|
||||
}
|
||||
f.link = link
|
||||
return f, nil
|
||||
}
|
||||
|
||||
type linuxFIB struct {
|
||||
objs *bpfObjects
|
||||
dstPort uint16
|
||||
link link.Link
|
||||
}
|
||||
|
||||
func (l *linuxFIB) Delete(vni uint32) error {
|
||||
return l.objs.EndpointMap.Delete(&vni)
|
||||
}
|
||||
|
||||
func (l *linuxFIB) Upsert(vni uint32, participants [2]netip.AddrPort) error {
|
||||
endpoint := bpfEndpoint{}
|
||||
for i, participant := range participants {
|
||||
as16 := participant.Addr().As16()
|
||||
for j := 0; j < 4; j++ {
|
||||
endpoint.ParticipantAddrs[i][j] = binary.NativeEndian.Uint32(as16[j*4:])
|
||||
}
|
||||
endpoint.ParticipantPorts[i] = participant.Port()
|
||||
if participant.Addr().Is6() {
|
||||
endpoint.ParticipantIsIpv6[i] = 1
|
||||
}
|
||||
}
|
||||
numCPU, err := ebpf.PossibleCPU()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vals := make([]bpfEndpoint, numCPU)
|
||||
for i := range vals {
|
||||
vals[i] = endpoint
|
||||
}
|
||||
return l.objs.EndpointMap.Put(&vni, vals)
|
||||
}
|
||||
|
||||
func (l *linuxFIB) Close() error { return nil }
|
||||
18
net/udprelay/xdp/xdp_notlinux.go
Normal file
18
net/udprelay/xdp/xdp_notlinux.go
Normal file
@ -0,0 +1,18 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !linux
|
||||
|
||||
package xdp
|
||||
|
||||
import "net/netip"
|
||||
|
||||
type noopFIB struct{}
|
||||
|
||||
func (noopFIB) Delete(vni uint32) error { return nil }
|
||||
func (noopFIB) Upsert(vni uint32, participants [2]netip.AddrPort) error { return nil }
|
||||
func (noopFIB) Close(vni uint32, participants [2]netip.AddrPort) error { return nil }
|
||||
|
||||
func NewFIB(config FIBConfig, opts ...FIBOption) (FIB, error) {
|
||||
return noopFIB{}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user