diff --git a/net/udprelay/server.go b/net/udprelay/server.go
index 26b27bb7f..48de1dfc1 100644
--- a/net/udprelay/server.go
+++ b/net/udprelay/server.go
@@ -25,6 +25,7 @@ import (
 	"golang.org/x/crypto/blake2s"
 	"golang.org/x/net/ipv6"
 	"tailscale.com/disco"
+	"tailscale.com/envknob"
 	"tailscale.com/net/batching"
 	"tailscale.com/net/netaddr"
 	"tailscale.com/net/netcheck"
@@ -34,6 +35,7 @@ import (
 	"tailscale.com/net/stun"
 	"tailscale.com/net/udprelay/endpoint"
 	"tailscale.com/net/udprelay/status"
+	"tailscale.com/net/udprelay/xdp"
 	"tailscale.com/tailcfg"
 	"tailscale.com/tstime"
 	"tailscale.com/types/key"
@@ -75,6 +77,7 @@ type Server struct {
 	wg                  sync.WaitGroup
 	closeCh             chan struct{}
 	netChecker          *netcheck.Client
+	fib                 xdp.FIB
 
 	mu                  sync.Mutex           // guards the following fields
 	macSecrets          [][blake2s.Size]byte // [0] is most recent, max 2 elements
@@ -140,7 +143,7 @@ func blakeMACFromBindMsg(blakeKey [blake2s.Size]byte, src netip.AddrPort, msg di
 	return out, nil
 }
 
-func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
+func (e *serverEndpoint) handleDiscoControlMsg(logf logger.Logf, fib xdp.FIB, from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
 	if senderIndex != 0 && senderIndex != 1 {
 		return nil, netip.AddrPort{}
 	}
@@ -218,6 +221,12 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex
 				e.boundAddrPorts[senderIndex] = from
 				e.lastSeen[senderIndex] = time.Now()    // record last seen as bound time
 				e.inProgressGeneration[senderIndex] = 0 // reset to zero, which indicates there is no in-progress handshake
+				if fib != nil && e.boundAddrPorts[0].IsValid() && e.boundAddrPorts[1].IsValid() {
+					err = fib.Upsert(e.vni, e.boundAddrPorts)
+					if err != nil {
+						logf("error upserting fib: %v", err)
+					}
+				}
 				return nil, netip.AddrPort{}
 			}
 		}
@@ -229,7 +238,7 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex
 	}
 }
 
-func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
+func (e *serverEndpoint) handleSealedDiscoControlMsg(logf logger.Logf, fib xdp.FIB, from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) {
 	senderRaw, isDiscoMsg := disco.Source(b)
 	if !isDiscoMsg {
 		// Not a Disco message
@@ -260,7 +269,7 @@ func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []by
 		return nil, netip.AddrPort{}
 	}
 
-	return e.handleDiscoControlMsg(from, senderIndex, discoMsg, serverDisco, macSecrets)
+	return e.handleDiscoControlMsg(logf, fib, from, senderIndex, discoMsg, serverDisco, macSecrets)
 }
 
 func (e *serverEndpoint) handleDataPacket(from netip.AddrPort, b []byte, now time.Time) (write []byte, to netip.AddrPort) {
@@ -323,6 +332,17 @@ func NewServer(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (s *Serv
 		byVNI:               make(map[uint32]*serverEndpoint),
 	}
 	s.discoPublic = s.disco.Public()
+	xdpDev := envknob.String("TS_PEER_RELAY_XDP_DEVICE")
+	if xdpDev != "" {
+		s.fib, err = xdp.NewFIB(&xdp.FIBConfig{
+			DstPort:    port,
+			DeviceName: xdpDev,
+		})
+	}
+
+	if err != nil {
+		return nil, err
+	}
 
 	// TODO(creachadair): Find a way to plumb this in during initialization.
 	// As-written, messages published here will not be seen by other components
@@ -547,11 +567,11 @@ func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {
 func (s *Server) bindSockets(desiredPort uint16) error {
 	// maxSocketsPerAF is a conservative starting point, but is somewhat
 	// arbitrary.
-	maxSocketsPerAF := min(16, runtime.NumCPU())
+	maxSocketsPerAF := min(128, runtime.NumCPU())
 	listenConfig := &net.ListenConfig{
 		Control: listenControl,
 	}
-	for _, network := range []string{"udp4", "udp6"} {
+	for _, network := range []string{"udp4"} { //, "udp6"} {
 	SocketsLoop:
 		for i := range maxSocketsPerAF {
 			if i > 0 {
@@ -626,6 +646,9 @@ func (s *Server) bindSocketTo(listenConfig *net.ListenConfig, network string, po
 // Close closes the server.
 func (s *Server) Close() error {
 	s.closeOnce.Do(func() {
+		if s.fib != nil {
+			s.fib.Close()
+		}
 		for _, uc4 := range s.uc4 {
 			uc4.Close()
 		}
@@ -662,6 +685,15 @@ func (s *Server) endpointGCLoop() {
 			if v.isExpired(now, s.bindLifetime, s.steadyStateLifetime) {
 				delete(s.byDisco, k)
 				delete(s.byVNI, v.vni)
+				// TODO: isExpired only considers userspace counters/liveliness
+				// TODO: this is a syscall per VNI to delete while holding s.mu,
+				//  consider batch delete
+				if s.fib != nil {
+					err := s.fib.Delete(v.vni)
+					if err != nil {
+						s.logf("failed to delete fib entry: %v", err)
+					}
+				}
 			}
 		}
 	}
@@ -708,7 +740,7 @@ func (s *Server) handlePacket(from netip.AddrPort, b []byte) (write []byte, to n
 		}
 		msg := b[packet.GeneveFixedHeaderLength:]
 		s.maybeRotateMACSecretLocked(now)
-		return e.handleSealedDiscoControlMsg(from, msg, s.discoPublic, s.macSecrets)
+		return e.handleSealedDiscoControlMsg(s.logf, s.fib, from, msg, s.discoPublic, s.macSecrets)
 	}
 	return e.handleDataPacket(from, b, now)
 }
diff --git a/net/udprelay/xdp/bpf_bpfeb.go b/net/udprelay/xdp/bpf_bpfeb.go
new file mode 100644
index 000000000..dce7dd177
--- /dev/null
+++ b/net/udprelay/xdp/bpf_bpfeb.go
@@ -0,0 +1,131 @@
+// Code generated by bpf2go; DO NOT EDIT.
+//go:build mips || mips64 || ppc64 || s390x
+
+package xdp
+
+import (
+	"bytes"
+	_ "embed"
+	"fmt"
+	"io"
+
+	"github.com/cilium/ebpf"
+)
+
+type bpfConfig struct{ DstPort uint16 }
+
+type bpfEndpoint struct {
+	ParticipantAddrs  [2][4]uint32
+	ParticipantPorts  [2]uint16
+	ParticipantIsIpv6 [2]uint8
+	_                 [2]byte
+}
+
+// loadBpf returns the embedded CollectionSpec for bpf.
+func loadBpf() (*ebpf.CollectionSpec, error) {
+	reader := bytes.NewReader(_BpfBytes)
+	spec, err := ebpf.LoadCollectionSpecFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("can't load bpf: %w", err)
+	}
+
+	return spec, err
+}
+
+// loadBpfObjects loads bpf and converts it into a struct.
+//
+// The following types are suitable as obj argument:
+//
+//	*bpfObjects
+//	*bpfPrograms
+//	*bpfMaps
+//
+// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
+func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
+	spec, err := loadBpf()
+	if err != nil {
+		return err
+	}
+
+	return spec.LoadAndAssign(obj, opts)
+}
+
+// bpfSpecs contains maps and programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfSpecs struct {
+	bpfProgramSpecs
+	bpfMapSpecs
+}
+
+// bpfSpecs contains programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfProgramSpecs struct {
+	XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"`
+}
+
+// bpfMapSpecs contains maps before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfMapSpecs struct {
+	ConfigMap   *ebpf.MapSpec `ebpf:"config_map"`
+	EndpointMap *ebpf.MapSpec `ebpf:"endpoint_map"`
+}
+
+// bpfObjects contains all objects after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfObjects struct {
+	bpfPrograms
+	bpfMaps
+}
+
+func (o *bpfObjects) Close() error {
+	return _BpfClose(
+		&o.bpfPrograms,
+		&o.bpfMaps,
+	)
+}
+
+// bpfMaps contains all maps after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfMaps struct {
+	ConfigMap   *ebpf.Map `ebpf:"config_map"`
+	EndpointMap *ebpf.Map `ebpf:"endpoint_map"`
+}
+
+func (m *bpfMaps) Close() error {
+	return _BpfClose(
+		m.ConfigMap,
+		m.EndpointMap,
+	)
+}
+
+// bpfPrograms contains all programs after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfPrograms struct {
+	XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"`
+}
+
+func (p *bpfPrograms) Close() error {
+	return _BpfClose(
+		p.XdpProgFunc,
+	)
+}
+
+func _BpfClose(closers ...io.Closer) error {
+	for _, closer := range closers {
+		if err := closer.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Do not access this directly.
+//
+//go:embed bpf_bpfeb.o
+var _BpfBytes []byte
diff --git a/net/udprelay/xdp/bpf_bpfeb.o b/net/udprelay/xdp/bpf_bpfeb.o
new file mode 100644
index 000000000..9b035f983
Binary files /dev/null and b/net/udprelay/xdp/bpf_bpfeb.o differ
diff --git a/net/udprelay/xdp/bpf_bpfel.go b/net/udprelay/xdp/bpf_bpfel.go
new file mode 100644
index 000000000..b6599db04
--- /dev/null
+++ b/net/udprelay/xdp/bpf_bpfel.go
@@ -0,0 +1,131 @@
+// Code generated by bpf2go; DO NOT EDIT.
+//go:build 386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64
+
+package xdp
+
+import (
+	"bytes"
+	_ "embed"
+	"fmt"
+	"io"
+
+	"github.com/cilium/ebpf"
+)
+
+type bpfConfig struct{ DstPort uint16 }
+
+type bpfEndpoint struct {
+	ParticipantAddrs  [2][4]uint32
+	ParticipantPorts  [2]uint16
+	ParticipantIsIpv6 [2]uint8
+	_                 [2]byte
+}
+
+// loadBpf returns the embedded CollectionSpec for bpf.
+func loadBpf() (*ebpf.CollectionSpec, error) {
+	reader := bytes.NewReader(_BpfBytes)
+	spec, err := ebpf.LoadCollectionSpecFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("can't load bpf: %w", err)
+	}
+
+	return spec, err
+}
+
+// loadBpfObjects loads bpf and converts it into a struct.
+//
+// The following types are suitable as obj argument:
+//
+//	*bpfObjects
+//	*bpfPrograms
+//	*bpfMaps
+//
+// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
+func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
+	spec, err := loadBpf()
+	if err != nil {
+		return err
+	}
+
+	return spec.LoadAndAssign(obj, opts)
+}
+
+// bpfSpecs contains maps and programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfSpecs struct {
+	bpfProgramSpecs
+	bpfMapSpecs
+}
+
+// bpfSpecs contains programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfProgramSpecs struct {
+	XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"`
+}
+
+// bpfMapSpecs contains maps before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfMapSpecs struct {
+	ConfigMap   *ebpf.MapSpec `ebpf:"config_map"`
+	EndpointMap *ebpf.MapSpec `ebpf:"endpoint_map"`
+}
+
+// bpfObjects contains all objects after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfObjects struct {
+	bpfPrograms
+	bpfMaps
+}
+
+func (o *bpfObjects) Close() error {
+	return _BpfClose(
+		&o.bpfPrograms,
+		&o.bpfMaps,
+	)
+}
+
+// bpfMaps contains all maps after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfMaps struct {
+	ConfigMap   *ebpf.Map `ebpf:"config_map"`
+	EndpointMap *ebpf.Map `ebpf:"endpoint_map"`
+}
+
+func (m *bpfMaps) Close() error {
+	return _BpfClose(
+		m.ConfigMap,
+		m.EndpointMap,
+	)
+}
+
+// bpfPrograms contains all programs after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfPrograms struct {
+	XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"`
+}
+
+func (p *bpfPrograms) Close() error {
+	return _BpfClose(
+		p.XdpProgFunc,
+	)
+}
+
+func _BpfClose(closers ...io.Closer) error {
+	for _, closer := range closers {
+		if err := closer.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Do not access this directly.
+//
+//go:embed bpf_bpfel.o
+var _BpfBytes []byte
diff --git a/net/udprelay/xdp/bpf_bpfel.o b/net/udprelay/xdp/bpf_bpfel.o
new file mode 100644
index 000000000..c72c4cf84
Binary files /dev/null and b/net/udprelay/xdp/bpf_bpfel.o differ
diff --git a/net/udprelay/xdp/xdp.c b/net/udprelay/xdp/xdp.c
new file mode 100644
index 000000000..386712975
--- /dev/null
+++ b/net/udprelay/xdp/xdp.c
@@ -0,0 +1,350 @@
+//go:build ignore
+
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf_endian.h>
+#include <bpf_helpers.h>
+
+char _license[4] SEC("license") = "GPL";
+
+struct config {
+	__u16 dst_port;
+};
+struct config *unused_config __attribute__((unused)); // required by bpf2go -type
+
+struct {
+      __uint(type, BPF_MAP_TYPE_ARRAY);
+      __uint(key_size, sizeof(__u32));
+      __uint(value_size, sizeof(struct config));
+      __uint(max_entries, 1);
+} config_map SEC(".maps");
+
+struct endpoint {
+	__be32 participant_addrs[2][4];
+	__u16 participant_ports[2];
+	__u8 participant_is_ipv6[2];
+};
+struct endpoint *unused_endpoint __attribute__((unused)); // required by bpf2go -type
+
+#define MAX_GENEVE_VNI (1 << 24) - 1
+
+struct {
+      __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+      __uint(key_size, sizeof(__u32)); // key is Geneve VNI
+      __uint(value_size, sizeof(struct endpoint));
+      __uint(max_entries, MAX_GENEVE_VNI);
+} endpoint_map SEC(".maps");
+
+#define MAX_UDP_LEN_IPV4 1480
+
+#define MAX_UDP_LEN_IPV6 1460
+
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1fff
+
+/*
+Geneve Header:
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |Ver|  Opt Len  |O|C|    Rsvd.  |          Protocol Type        |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |        Virtual Network Identifier (VNI)       |    Reserved   |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   ~                    Variable-Length Options                    ~
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+struct geneve_header {
+    __u8 first;
+    __u8 second;
+    __be16 protocol;
+    __be32 vni;
+};
+
+static __always_inline __u16 csum_fold(__u32 csum) {
+	__u32 sum;
+	sum = (csum >> 16) + (csum & 0xffff); // maximum value 0x1fffe
+	sum += (sum >> 16); // maximum value 0xffff
+	return sum;
+}
+
+static __always_inline __u16 csum_fold_flip(__u32 csum) {
+	__u32 sum;
+	sum = (csum >> 16) + (csum & 0xffff); // maximum value 0x1fffe
+	sum += (sum >> 16); // maximum value 0xffff
+	return ~sum;
+}
+
+static __always_inline __u32 pseudo_sum_ipv6(struct ipv6hdr* ip6, __u16 udp_len) {
+	__u32 pseudo = 0; // TODO(jwhited): __u64 for intermediate checksum values to reduce number of ops
+	for (int i = 0; i < 8; i ++) {
+		pseudo += ip6->saddr.in6_u.u6_addr16[i];
+		pseudo += ip6->daddr.in6_u.u6_addr16[i];
+	}
+	pseudo += bpf_htons(ip6->nexthdr);
+	pseudo += udp_len;
+	return pseudo;
+}
+
+static __always_inline __u32 pseudo_sum_ipv4(struct iphdr* ip, __u16 udp_len) {
+	__u32 pseudo = (__u16)ip->saddr;
+	pseudo += (__u16)(ip->saddr >> 16);
+	pseudo += (__u16)ip->daddr;
+	pseudo += (__u16)(ip->daddr >> 16);
+	pseudo += bpf_htons(ip->protocol);
+	pseudo += udp_len;
+	return pseudo;
+}
+
+// csum_const_size is an alternative to bpf_csum_diff. It's a verifier
+// workaround for when we are forced to use a constant max_size + bounds
+// checking. The alternative being passing a dynamic length to bpf_csum_diff
+// {from,to}_size arguments, which the verifier can't follow. For further info
+// see: https://github.com/iovisor/bcc/issues/2463#issuecomment-512503958
+static __always_inline __u16 csum_const_size(__u32 seed, void* from, void* data_end, int max_size) {
+	__u16 *buf = from;
+	for (int i = 0; i < max_size; i += 2) {
+		if ((void *)(buf + 1) > data_end) {
+			break;
+		}
+		seed += *buf;
+		buf++;
+	}
+	if ((void *)buf + 1 <= data_end) {
+		seed += *(__u8 *)buf;
+	}
+	return csum_fold_flip(seed);
+}
+
+SEC("xdp")
+int xdp_prog_func(struct xdp_md *ctx) {
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+
+    struct ethhdr *eth = data;
+    if ((void *)(eth + 1) > data_end) {
+        return XDP_PASS;
+    }
+
+	struct iphdr *ip;
+	struct ipv6hdr *ip6;
+	struct udphdr *udp;
+
+    int validate_udp_csum = 0;
+    int is_ipv6 = 0;
+    if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+        ip = (void *)(eth + 1);
+        if ((void *)(ip + 1) > data_end) {
+            return XDP_PASS;
+        }
+
+        if (ip->ihl != 5 ||
+            ip->version != 4 ||
+            ip->protocol != IPPROTO_UDP ||
+            (ip->frag_off & bpf_htons(IP_MF | IP_OFFSET)) != 0) {
+            return XDP_PASS;
+        }
+
+        // validate ipv4 header checksum
+        __u32 cs_unfolded = bpf_csum_diff(0, 0, (void *)ip, sizeof(*ip), 0);
+        __u16 cs = csum_fold_flip(cs_unfolded);
+        if (cs != 0) {
+            return XDP_PASS;
+        }
+
+        if (bpf_ntohs(ip->tot_len) != data_end - (void *)ip) {
+            return XDP_PASS;
+        }
+
+        udp = (void *)(ip + 1);
+        if ((void *)(udp + 1) > data_end) {
+            return XDP_PASS;
+        }
+
+        if (udp->check != 0) {
+            // https://datatracker.ietf.org/doc/html/rfc768#page-3
+            // If the computed  checksum  is zero,  it is transmitted  as all
+            // ones (the equivalent  in one's complement  arithmetic).   An all
+            // zero  transmitted checksum  value means that the transmitter
+            // generated  no checksum  (for debugging or for higher level
+            // protocols that don't care).
+            validate_udp_csum = 1;
+        }
+    } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+        ip6 = (void *)(eth + 1);
+        if ((void *)(ip6 + 1) > data_end) {
+            return XDP_PASS;
+        }
+
+        if (ip6->version != 6 || ip6->nexthdr != IPPROTO_UDP) {
+            return XDP_PASS;
+        }
+
+        udp = (void *)(ip6 + 1);
+        if ((void *)(udp + 1) > data_end) {
+            return XDP_PASS;
+        }
+
+        if (bpf_ntohs(ip6->payload_len) != data_end - (void *)udp) {
+            return XDP_PASS;
+        }
+
+        // https://datatracker.ietf.org/doc/html/rfc8200#page-28
+        // Unlike IPv4, the default behavior when UDP packets are
+        // originated by an IPv6 node is that the UDP checksum is not
+        // optional.  That is, whenever originating a UDP packet, an IPv6
+        // node must compute a UDP checksum over the packet and the
+        // pseudo-header, and, if that computation yields a result of
+        // zero, it must be changed to hex FFFF for placement in the UDP
+        // header.  IPv6 receivers must discard UDP packets containing a
+        // zero checksum and should log the error.
+        validate_udp_csum = 1;
+        is_ipv6 = 1;
+    } else {
+        return XDP_PASS;
+    }
+
+    __u32 config_key = 0;
+    struct config *c = bpf_map_lookup_elem(&config_map, &config_key);
+    if (!c) {
+        return XDP_PASS;
+    }
+
+    if (bpf_ntohs(udp->len) != data_end - (void *)udp) {
+        return XDP_PASS;
+    }
+
+    if (bpf_ntohs(udp->dest) != c->dst_port) {
+        return XDP_PASS;
+    }
+
+	if (validate_udp_csum) {
+		__u16 cs;
+		__u32 pseudo_sum;
+		if (is_ipv6) {
+			pseudo_sum = pseudo_sum_ipv6(ip6, udp->len);
+			cs = csum_const_size(pseudo_sum, udp, data_end, MAX_UDP_LEN_IPV6);
+		} else {
+			pseudo_sum = pseudo_sum_ipv4(ip, udp->len);
+			cs = csum_const_size(pseudo_sum, udp, data_end, MAX_UDP_LEN_IPV4);
+		}
+		if (cs != 0) {
+			return XDP_PASS;
+		}
+	}
+
+	struct geneve_header *geneve = (void *)(udp + 1);
+	if ((void *)(geneve +1) > data_end) {
+	    return XDP_PASS;
+	}
+
+	if (geneve->first != 0) {
+	    // first 2 bits are version, must be zero
+	    // next 6 bits are opt len, must be zero
+	    return XDP_PASS;
+	}
+
+	if (geneve->second != 0) {
+	    // first bit is control, must be zero
+	    // next bit is critical (options), must be zero
+	    // next 6 bits are reserved, must be zero
+	    return XDP_PASS;
+	}
+
+	if ((geneve->vni & 0x000000FF) != 0) {
+	    // last byte is reserved, must be zero
+        return XDP_PASS;
+	}
+
+	__u32 vni_key = bpf_ntohl(geneve->vni) >> 8;
+	struct endpoint *e = bpf_map_lookup_elem(&endpoint_map, &vni_key);
+	if (!e) {
+	    return XDP_PASS;
+	}
+
+    int out_participant_index = -1; // -1 = unmatched
+    if (is_ipv6) {
+        // TODO
+    } else {
+        for (int i = 0; i < 2; i ++) {
+            if (e->participant_is_ipv6[i] == 0 &&
+                e->participant_addrs[i][3] == ip->saddr &&
+                e->participant_ports[i] == bpf_ntohs(udp->source))
+            {
+                if (i == 0) {
+                    out_participant_index = 1;
+                } else {
+                    out_participant_index = 0;
+                }
+                break;
+            }
+        }
+    }
+    if (out_participant_index == -1) {
+        return XDP_PASS;
+    }
+
+    if (e->participant_is_ipv6[out_participant_index] == is_ipv6) {
+        // matching in/out address family
+        if (is_ipv6) {
+            // TODO: in ipv6, out ipv6
+        } else {
+            // TODO: in ipv4, out ipv4
+
+            // Update IPv4 header
+            __be32 p_addr = e->participant_addrs[out_participant_index][3];
+            __u32 ip_csum = ~(__u32)ip->check;
+            __u32 udp_csum = ~(__u32)udp->check;
+            ip_csum = bpf_csum_diff(&ip->saddr, 4, &p_addr, 4, ip_csum);
+            udp_csum = bpf_csum_diff(&ip->saddr, 4, &p_addr, 4, udp_csum);
+            ip->check = csum_fold_flip(ip_csum);
+            ip->saddr = ip->daddr;
+            ip->daddr = p_addr;
+
+            #define AF_INET 2
+            struct bpf_fib_lookup fib_params = {};
+            fib_params.family	= AF_INET;
+            fib_params.tos		= ip->tos;
+            fib_params.l4_protocol	= ip->protocol;
+            fib_params.sport	= 0;
+            fib_params.dport	= 0;
+            fib_params.tot_len	= bpf_ntohs(ip->tot_len);
+            fib_params.ipv4_src	= ip->saddr;
+            fib_params.ipv4_dst	= ip->daddr;
+            fib_params.ifindex = ctx->ingress_ifindex;
+
+            int rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), BPF_FIB_LOOKUP_DIRECT);
+            if (rc != BPF_FIB_LKUP_RET_SUCCESS) {
+                return XDP_ABORTED;
+            }
+
+            // Rewrite ethernet header source and destination address.
+            __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+            __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+
+            // Update UDP header
+            __u32 old_udp_port = (__u32)udp->source;
+            __u32 new_udp_port = (__u32)bpf_htons(e->participant_ports[out_participant_index]);
+            udp_csum = bpf_csum_diff(&old_udp_port, 4, &new_udp_port, 4, udp_csum);
+            udp->check = csum_fold_flip(udp_csum);
+            udp->source = udp->dest;
+            udp->dest = bpf_htons(e->participant_ports[out_participant_index]);
+            udp = (void *)(ip + 1);
+            if ((void *)(udp +1) > data_end) {
+              return XDP_ABORTED;
+            }
+
+            return XDP_TX;
+        }
+    } else if (e->participant_is_ipv6[out_participant_index] == 0) {
+        // TODO: in ipv4, out ipv6
+    } else {
+        // TODO: in ipv6, out ipv4
+    }
+
+	return XDP_PASS;
+}
\ No newline at end of file
diff --git a/net/udprelay/xdp/xdp.go b/net/udprelay/xdp/xdp.go
new file mode 100644
index 000000000..14c361879
--- /dev/null
+++ b/net/udprelay/xdp/xdp.go
@@ -0,0 +1,48 @@
+package xdp
+
+import "net/netip"
+
+// XDPAttachFlags represents how XDP program will be attached to interface. This
+// is a mirror of cilium/ebpf/link.AttachFlags, without pulling it in for
+// non-Linux.
+type XDPAttachFlags uint32
+
+const (
+	// XDPDriverFallbackGenericMode attempts XDPDriverMode, and falls back to
+	// XDPGenericMode if the driver does not support XDP.
+	XDPDriverFallbackGenericMode = 0
+)
+
+const (
+	// XDPGenericMode (SKB) links XDP BPF program for drivers which do
+	// not yet support native XDP.
+	XDPGenericMode XDPAttachFlags = 1 << (iota + 1)
+	// XDPDriverMode links XDP BPF program into the driver’s receive path.
+	XDPDriverMode
+	// XDPOffloadMode offloads the entire XDP BPF program into hardware.
+	XDPOffloadMode
+)
+
+type FIBConfig struct {
+	DeviceName string
+	// TODO: DstPort is singular, but udp4 and udp6 can be independent ports if
+	//  the user supplied a zero port value.
+	DstPort     uint16
+	AttachFlags XDPAttachFlags
+}
+
+func (f FIBConfig) validate() error { return nil }
+
+type FIBOption interface {
+	apply(*fibOptions)
+}
+
+type fibOptions struct {
+	noAttach bool
+}
+
+type FIB interface {
+	Delete(vni uint32) error
+	Upsert(vni uint32, participants [2]netip.AddrPort) error
+	Close() error
+}
diff --git a/net/udprelay/xdp/xdp_linux.go b/net/udprelay/xdp/xdp_linux.go
new file mode 100644
index 000000000..9a61942e9
--- /dev/null
+++ b/net/udprelay/xdp/xdp_linux.go
@@ -0,0 +1,103 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build linux
+
+package xdp
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"net"
+	"net/netip"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/link"
+)
+
+//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type config -type endpoint bpf xdp.c -- -I ../../../derp/xdp/headers
+
+func NewFIB(config *FIBConfig, opts ...FIBOption) (FIB, error) {
+	o := &fibOptions{}
+	for _, opt := range opts {
+		opt.apply(o)
+	}
+	err := config.validate()
+	if err != nil {
+		return nil, fmt.Errorf("invalid config: %v", err)
+	}
+	objs := new(bpfObjects)
+	err = loadBpfObjects(objs, nil)
+	if err != nil {
+		var ve *ebpf.VerifierError
+		if errors.As(err, &ve) {
+			err = fmt.Errorf("verifier error: %+v", ve)
+		}
+		return nil, fmt.Errorf("error loading XDP program: %w", err)
+	}
+	f := &linuxFIB{
+		objs:    objs,
+		dstPort: config.DstPort,
+	}
+	var key uint32
+	xdpConfig := &bpfConfig{
+		DstPort: config.DstPort,
+	}
+	err = objs.ConfigMap.Put(key, xdpConfig)
+	if err != nil {
+		return nil, fmt.Errorf("error loading config in eBPF map: %w", err)
+	}
+	if o.noAttach {
+		return f, nil
+	}
+	iface, err := net.InterfaceByName(config.DeviceName)
+	if err != nil {
+		return nil, fmt.Errorf("error finding device: %w", err)
+	}
+	link, err := link.AttachXDP(link.XDPOptions{
+		Program:   objs.XdpProgFunc,
+		Interface: iface.Index,
+		Flags:     link.XDPAttachFlags(config.AttachFlags),
+	})
+	if err != nil {
+		return nil, fmt.Errorf("error attaching XDP program to dev: %w", err)
+	}
+	f.link = link
+	return f, nil
+}
+
+type linuxFIB struct {
+	objs    *bpfObjects
+	dstPort uint16
+	link    link.Link
+}
+
+func (l *linuxFIB) Delete(vni uint32) error {
+	return l.objs.EndpointMap.Delete(&vni)
+}
+
+func (l *linuxFIB) Upsert(vni uint32, participants [2]netip.AddrPort) error {
+	endpoint := bpfEndpoint{}
+	for i, participant := range participants {
+		as16 := participant.Addr().As16()
+		for j := 0; j < 4; j++ {
+			endpoint.ParticipantAddrs[i][j] = binary.NativeEndian.Uint32(as16[j*4:])
+		}
+		endpoint.ParticipantPorts[i] = participant.Port()
+		if participant.Addr().Is6() {
+			endpoint.ParticipantIsIpv6[i] = 1
+		}
+	}
+	numCPU, err := ebpf.PossibleCPU()
+	if err != nil {
+		return err
+	}
+	vals := make([]bpfEndpoint, numCPU)
+	for i := range vals {
+		vals[i] = endpoint
+	}
+	return l.objs.EndpointMap.Put(&vni, vals)
+}
+
+func (l *linuxFIB) Close() error { return nil }
diff --git a/net/udprelay/xdp/xdp_notlinux.go b/net/udprelay/xdp/xdp_notlinux.go
new file mode 100644
index 000000000..ba1466e94
--- /dev/null
+++ b/net/udprelay/xdp/xdp_notlinux.go
@@ -0,0 +1,18 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build !linux
+
+package xdp
+
+import "net/netip"
+
+type noopFIB struct{}
+
+func (noopFIB) Delete(vni uint32) error                                 { return nil }
+func (noopFIB) Upsert(vni uint32, participants [2]netip.AddrPort) error { return nil }
+func (noopFIB) Close(vni uint32, participants [2]netip.AddrPort) error  { return nil }
+
+func NewFIB(config FIBConfig, opts ...FIBOption) (FIB, error) {
+	return noopFIB{}
+}