From c3f9d1c22ea5be021a1e0170bcdf218695c67c0c Mon Sep 17 00:00:00 2001 From: Jordan Whited Date: Mon, 8 Dec 2025 14:51:13 -0800 Subject: [PATCH] net/udprelay: XDP PoC do not merge Updates tailscale/corp#34849 Signed-off-by: Jordan Whited --- net/udprelay/server.go | 44 +++- net/udprelay/xdp/bpf_bpfeb.go | 131 ++++++++++++ net/udprelay/xdp/bpf_bpfeb.o | Bin 0 -> 17144 bytes net/udprelay/xdp/bpf_bpfel.go | 131 ++++++++++++ net/udprelay/xdp/bpf_bpfel.o | Bin 0 -> 17248 bytes net/udprelay/xdp/xdp.c | 350 +++++++++++++++++++++++++++++++ net/udprelay/xdp/xdp.go | 48 +++++ net/udprelay/xdp/xdp_linux.go | 103 +++++++++ net/udprelay/xdp/xdp_notlinux.go | 18 ++ 9 files changed, 819 insertions(+), 6 deletions(-) create mode 100644 net/udprelay/xdp/bpf_bpfeb.go create mode 100644 net/udprelay/xdp/bpf_bpfeb.o create mode 100644 net/udprelay/xdp/bpf_bpfel.go create mode 100644 net/udprelay/xdp/bpf_bpfel.o create mode 100644 net/udprelay/xdp/xdp.c create mode 100644 net/udprelay/xdp/xdp.go create mode 100644 net/udprelay/xdp/xdp_linux.go create mode 100644 net/udprelay/xdp/xdp_notlinux.go diff --git a/net/udprelay/server.go b/net/udprelay/server.go index 26b27bb7f..48de1dfc1 100644 --- a/net/udprelay/server.go +++ b/net/udprelay/server.go @@ -25,6 +25,7 @@ import ( "golang.org/x/crypto/blake2s" "golang.org/x/net/ipv6" "tailscale.com/disco" + "tailscale.com/envknob" "tailscale.com/net/batching" "tailscale.com/net/netaddr" "tailscale.com/net/netcheck" @@ -34,6 +35,7 @@ import ( "tailscale.com/net/stun" "tailscale.com/net/udprelay/endpoint" "tailscale.com/net/udprelay/status" + "tailscale.com/net/udprelay/xdp" "tailscale.com/tailcfg" "tailscale.com/tstime" "tailscale.com/types/key" @@ -75,6 +77,7 @@ type Server struct { wg sync.WaitGroup closeCh chan struct{} netChecker *netcheck.Client + fib xdp.FIB mu sync.Mutex // guards the following fields macSecrets [][blake2s.Size]byte // [0] is most recent, max 2 elements @@ -140,7 +143,7 @@ func blakeMACFromBindMsg(blakeKey [blake2s.Size]byte, src netip.AddrPort, msg di return out, nil } -func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) { +func (e *serverEndpoint) handleDiscoControlMsg(logf logger.Logf, fib xdp.FIB, from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) { if senderIndex != 0 && senderIndex != 1 { return nil, netip.AddrPort{} } @@ -218,6 +221,12 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex e.boundAddrPorts[senderIndex] = from e.lastSeen[senderIndex] = time.Now() // record last seen as bound time e.inProgressGeneration[senderIndex] = 0 // reset to zero, which indicates there is no in-progress handshake + if fib != nil && e.boundAddrPorts[0].IsValid() && e.boundAddrPorts[1].IsValid() { + err = fib.Upsert(e.vni, e.boundAddrPorts) + if err != nil { + logf("error upserting fib: %v", err) + } + } return nil, netip.AddrPort{} } } @@ -229,7 +238,7 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex } } -func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) { +func (e *serverEndpoint) handleSealedDiscoControlMsg(logf logger.Logf, fib xdp.FIB, from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) { senderRaw, isDiscoMsg := disco.Source(b) if !isDiscoMsg { // Not a Disco message @@ -260,7 +269,7 @@ func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []by return nil, netip.AddrPort{} } - return e.handleDiscoControlMsg(from, senderIndex, discoMsg, serverDisco, macSecrets) + return e.handleDiscoControlMsg(logf, fib, from, senderIndex, discoMsg, serverDisco, macSecrets) } func (e *serverEndpoint) handleDataPacket(from netip.AddrPort, b []byte, now time.Time) (write []byte, to netip.AddrPort) { @@ -323,6 +332,17 @@ func NewServer(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (s *Serv byVNI: make(map[uint32]*serverEndpoint), } s.discoPublic = s.disco.Public() + xdpDev := envknob.String("TS_PEER_RELAY_XDP_DEVICE") + if xdpDev != "" { + s.fib, err = xdp.NewFIB(&xdp.FIBConfig{ + DstPort: port, + DeviceName: xdpDev, + }) + } + + if err != nil { + return nil, err + } // TODO(creachadair): Find a way to plumb this in during initialization. // As-written, messages published here will not be seen by other components @@ -547,11 +567,11 @@ func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) { func (s *Server) bindSockets(desiredPort uint16) error { // maxSocketsPerAF is a conservative starting point, but is somewhat // arbitrary. - maxSocketsPerAF := min(16, runtime.NumCPU()) + maxSocketsPerAF := min(128, runtime.NumCPU()) listenConfig := &net.ListenConfig{ Control: listenControl, } - for _, network := range []string{"udp4", "udp6"} { + for _, network := range []string{"udp4"} { //, "udp6"} { SocketsLoop: for i := range maxSocketsPerAF { if i > 0 { @@ -626,6 +646,9 @@ func (s *Server) bindSocketTo(listenConfig *net.ListenConfig, network string, po // Close closes the server. func (s *Server) Close() error { s.closeOnce.Do(func() { + if s.fib != nil { + s.fib.Close() + } for _, uc4 := range s.uc4 { uc4.Close() } @@ -662,6 +685,15 @@ func (s *Server) endpointGCLoop() { if v.isExpired(now, s.bindLifetime, s.steadyStateLifetime) { delete(s.byDisco, k) delete(s.byVNI, v.vni) + // TODO: isExpired only considers userspace counters/liveliness + // TODO: this is a syscall per VNI to delete while holding s.mu, + // consider batch delete + if s.fib != nil { + err := s.fib.Delete(v.vni) + if err != nil { + s.logf("failed to delete fib entry: %v", err) + } + } } } } @@ -708,7 +740,7 @@ func (s *Server) handlePacket(from netip.AddrPort, b []byte) (write []byte, to n } msg := b[packet.GeneveFixedHeaderLength:] s.maybeRotateMACSecretLocked(now) - return e.handleSealedDiscoControlMsg(from, msg, s.discoPublic, s.macSecrets) + return e.handleSealedDiscoControlMsg(s.logf, s.fib, from, msg, s.discoPublic, s.macSecrets) } return e.handleDataPacket(from, b, now) } diff --git a/net/udprelay/xdp/bpf_bpfeb.go b/net/udprelay/xdp/bpf_bpfeb.go new file mode 100644 index 000000000..dce7dd177 --- /dev/null +++ b/net/udprelay/xdp/bpf_bpfeb.go @@ -0,0 +1,131 @@ +// Code generated by bpf2go; DO NOT EDIT. +//go:build mips || mips64 || ppc64 || s390x + +package xdp + +import ( + "bytes" + _ "embed" + "fmt" + "io" + + "github.com/cilium/ebpf" +) + +type bpfConfig struct{ DstPort uint16 } + +type bpfEndpoint struct { + ParticipantAddrs [2][4]uint32 + ParticipantPorts [2]uint16 + ParticipantIsIpv6 [2]uint8 + _ [2]byte +} + +// loadBpf returns the embedded CollectionSpec for bpf. +func loadBpf() (*ebpf.CollectionSpec, error) { + reader := bytes.NewReader(_BpfBytes) + spec, err := ebpf.LoadCollectionSpecFromReader(reader) + if err != nil { + return nil, fmt.Errorf("can't load bpf: %w", err) + } + + return spec, err +} + +// loadBpfObjects loads bpf and converts it into a struct. +// +// The following types are suitable as obj argument: +// +// *bpfObjects +// *bpfPrograms +// *bpfMaps +// +// See ebpf.CollectionSpec.LoadAndAssign documentation for details. +func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { + spec, err := loadBpf() + if err != nil { + return err + } + + return spec.LoadAndAssign(obj, opts) +} + +// bpfSpecs contains maps and programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfSpecs struct { + bpfProgramSpecs + bpfMapSpecs +} + +// bpfSpecs contains programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfProgramSpecs struct { + XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"` +} + +// bpfMapSpecs contains maps before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfMapSpecs struct { + ConfigMap *ebpf.MapSpec `ebpf:"config_map"` + EndpointMap *ebpf.MapSpec `ebpf:"endpoint_map"` +} + +// bpfObjects contains all objects after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfObjects struct { + bpfPrograms + bpfMaps +} + +func (o *bpfObjects) Close() error { + return _BpfClose( + &o.bpfPrograms, + &o.bpfMaps, + ) +} + +// bpfMaps contains all maps after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfMaps struct { + ConfigMap *ebpf.Map `ebpf:"config_map"` + EndpointMap *ebpf.Map `ebpf:"endpoint_map"` +} + +func (m *bpfMaps) Close() error { + return _BpfClose( + m.ConfigMap, + m.EndpointMap, + ) +} + +// bpfPrograms contains all programs after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfPrograms struct { + XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"` +} + +func (p *bpfPrograms) Close() error { + return _BpfClose( + p.XdpProgFunc, + ) +} + +func _BpfClose(closers ...io.Closer) error { + for _, closer := range closers { + if err := closer.Close(); err != nil { + return err + } + } + return nil +} + +// Do not access this directly. +// +//go:embed bpf_bpfeb.o +var _BpfBytes []byte diff --git a/net/udprelay/xdp/bpf_bpfeb.o b/net/udprelay/xdp/bpf_bpfeb.o new file mode 100644 index 0000000000000000000000000000000000000000..9b035f98380317db411cf6f858b61ae7c863b5a8 GIT binary patch literal 17144 zcmchdYiwM{b;oC4e2FsGvFy-_6OU|JA!U(XQ4~YRma>*DMQRmmBTIfHQ17mmOK~AS zR@|jTTDH4slR{|wfp$|@MGSNu*|plb0nyl%ni%Dxby~Ug2L?4z6>$I-4Nw&}QZ_MA zp*67mpP4hu*&|Xm5}+5lJM%l|%$~^w35P?feudP3Bku|78f3mOZ79jV zH>l7BlU3o&*Ho0UnP;tFK5x_Jv`KI18Bsr^+=*_r)}%A4ozi0k`c*9lCQYgi7t}pK zHNVetbJC=D^z2n{QSQVh!|JeBe~;WD)b~k}fPaAG#(cM0NBo$k-!If4{~OAkezA6z z(hG6=MXF7r<_Bzg`lVWhau<^HtJYmenN)qlCb!M3x8>)x6}WMCCX}kxtc-r=ciDXK zpiON*YPC~I!|@K*&u_H)Q<~+>GrwZ{;mV=^e@ZRPcUk+}dHnf%l^MyAJJlbPX1?3% zo3%+d_jIXql!HG^NjIJgX`>I_tgWAzRZOW*kOuyLL8;&RvGD%@{QrlfcmB8iQyu8v z^AI;-9@dc_{7ob`ivH|Gf6U6LkAug1Ax)*=ckAyE#%20}as3X-PrqE7H7U=JoX6~o zwGT-@znOlOqVd;iwGTUucZ|sV@H~c`f9F~6=JDpZdYTO92W^?#`)!)vXxH_TJ~cy< z{EaN{S7FK>*{V)b`dFHNRp@Y9O;P6Nf*PQ8oWE5ii1YiDrnKV$`UT%==LO^KB<|Qs zzea9a&Of1yhdRDomv1rpyt<9jj*OLes55z?DEy@^vZi)OzYiw#vy? zo(E@lI8OaRyySJ+Y|5*6Pp3W8Tz`L`I*sdEI~z|NN3J)7ddpN0=UIi)dOXug6MuSxT0<%7H9G_KzC`TR zYvK>;eUao`FP*Pt;uY#OGY|W3k>15!tn)Il+Xp9ZwBvw$mgKNEQ-u1;#K){(qNgjh zX<~N{cd-96pRnVFzm5OBewgv2f2Ll-Tx~;{|Qh zQQoJ~Zx~d!(fl=>`_@hcZao+~ocmTD?_mA>QKL|&(k6{}FwcC&_Rsa}73gt})nq?r zK5zRc`vbq2@g0BOjxTeojZ4{QZ9L0twtmSzXXO_#UR>egxf<42{kcbFU#cw;BYmIK zE1dp>(?w3-<@7SA?{K=n>D!!M;`B{UtCXI&S9MT|_N$bFjmN>S6CZ!6c9BxKUe!6=`sIjDa~x)b@A=m8^mHCzQ*+@*`Fbv-`&T^SSfvq z)5A)cC%XK4>zCrQcAvO?4sIjU%=((~mx~KL|1HJmY&;S2IOHadqP*F^Twf>HtXs5a z<^vp|`n~qT#L)kgQs_gJdU`_A|BO=T5&9bRQDSeO5Gw4SIZECBJf?|M2zJwyar<>X zLuBHIrWEB~rF4Eh{bE1G&H9}a`|DM?pWM24`>J`?6yJOS<>_*9u;;9O-eP&g$*~_Q zWS76m>W}j|@eA)ybGp|3R6n<0vE!+semgI>C{68x%{uYn=!G5)?E_!XIWlGN~b)n`zr38H{b^st$$OF+iNG`3hkM7%r>Uz z3N~F-mTk;Ry2b{vm25=6&DjSrJ^T9orgzpdZt6G1tt#$)-0vHa_xrsL{3z-rpVGV? z!F@Asx{l(eyvBW;^GT#roKGXAtw1(q+~4GU1?f360m?jo{|P%7&6v9ArJ6)EWQvdU zRP;SKkYtFf9e=>~v`*wa(tJ@pxI13pG0`qIMc9X*hVEq;kPM;@+Uw^10QuLsU$-Ow zeQt+}!S5aa!1d8HgJ0hr&|OA-G%c8`j(WVsoFXHWemd&uLO>qH;ocX8h-F=DR_tzB1U8;@xNWU!4thy`KV0BhWI zQF(qNrF?v^XJgI@=rwMRHk6M)3>(yEu1|3qRr&Ff3W5g?9(>@5;E{b_*&Q5uBDFgR zRB5J=Q^#{NK{5YiPMyq*PUUQNEHfSC#!Hj=Tv25w#)tFAg0W0N<;I5!6R6C<^iUxf z8&X4=QpWtCfTdt82T^|f*krC)4D!SI@uA$bnw$IG$h@i?JAZgr?OT!Hl#PBc(*>ST2D^oLDWm? zpx&XotfaeZbYlEiH_Uo=I_-VFxu$DX^ZOL3^TWEUUSF;>qVLvQyY)`(Dx@AbJE(hh zj?i#Q6MDxEeY7wfjFcwEi(R`9?F~}FzErm}cSY(|=L@UV>`e28o}KxTQN3Y@-maf| z%Ct&4x=3f5%}yADeW}#J1BVU-haXBw(=M_dp3EEzCWeRgCa;tGQo)z@=%+N&1AF#7 zvilHvi^dXupUo`lY%!P`AD$Q;!bI9(W{Xo}!BBp9xGS+mzOY5p(wi9W+MF+R zL)VQdfHC=<3hA9Y_11xI4Bp}BSZpQ9rS@(D&^8G=@r-tCs>_{$qyrK4| zb#AnnGwpu#!gzGxdP`&Aqu0hj&(4#%$zpzDoH{XpS;wM)a6C6%8X1}-p|>dH5PT1y zP$4rjI*}Q=o*OV|R|&5P8$=LmFtrK0d3eLx_<{|~iv@uu7@e3nK2->Eqq(uJP0Imj zi*|sDuvBRF$j2M9uC>MqGrqshY6rXzRdN^GC4kmo2GE z^twT|94-8bqwQkabUUi+=+y8^%b~Tkn8S8NH#WlprG_Uabr%KEyqORr_tSnd;r~7m zEc?s|?EG=(u&cn>krXD(SQbj2Q#&aig`tEB2yeVk4h@JV#VyV8<8&t&{ zR`O7zj%&sC9M2=%CZ-eS+OualxVG9EfI(olkK2x;eqo$CJM|!3z`M#F)p~PowSDDG z=rqU9;s%@>H{@vgX@|LsoSUQVKUvCW^Mwqa19+)p0n>?=(3>_XXYEo^f@Zu3`>$>@ zMe6J5V-%(RM@8M?X@j!a+q8b2eojnb8~gpIM-$tTQ3IG`!UgWFecAMXwD~drK!nr{R~1`V=uf8f9!f)8C2{>Piej57>wy3JtYLgqxk|h zyp5vQ7O;VY;%7Zz$U1GL*um4(SjBMuC{1~0tk^r88Ox8(wC>Ok>L4j8}s?Fnz4N^pp`(6;No?m zu|srDO}gg;YS2zj7fND1FZZDSG`TFoL*=g2o?y?uUBUhX2Y&T%DtKt$!QBrYk}DIj z4K3g~sBTTR2X`L|9y$ErgS#JjL`FkIhN=8$DL;fl~-4BbFSF4AfJ{oEnx_&JPB${>;Z9Q5KaI7C*Y_}TSY6H@E?$io}Xr0)%6A|)k z6ka*xiJ`eDfUbzh;dj&qCBGL&;tGp8{S%&^tyegY{RIaLJgAt{0lhFpj&1aV5!*-f zYvK}gD+Q){O+2<=#8ugm%%loN^V!^ZF~_gj$~|b}eI3tP%WuTR5zHxYs+b!J>=SYQ zbzJ#JnmJ$_Bxc4MBz$b64LaSZslnxzOt-ip_lTf+j~0un7Y_Fh8ThTj&!@%45<$&iv;p+)v-EMm(nPa|PzhD*O^zA>p;%i-HqI7Q!O&U#GWV=9(IR6n&hXE~fv;URn<3u)%>s8ByKk1tG& z@q`kd9^bG+gUmlgmh^i)#<#-|k2mru6&{U)(^hYoKKJqXBYi6T6zUG~cny=j%=V-w zt}vfa;R?nVy6AByc!7CVh2Nz1E;C?0&WMVJSP3)g3HV^DzYEnX~VOY@oTQ|StO`G`gTk7lrc&ru{U1$xdiXcW<8O+n z&?T&oXu;|Yw}9CnO*dibQ+?-IPmIMIzhG5i&qw(R^BD|?K-#?COazhW0XhQDH0nEykC-ogHlUG*62 zzbWo9^iA4h()0M5*CNzlzM6P^hS7f0UTg2-XH(i^#Mh<~j}cFsczj`wr%h$n6H|L< zEH~e!BCp~bd=rnSc>u1!8P`;0{h*4xiSwyx!DA|anK@9AcMz|e7Ck2Y73L>Z$REv(L#*6%>TS%y|#?0=>BcsvusaH_7#us11tnA&Im)!ScUy=(uf$7sI=KA`>7!tJ+ckI{Zh7c;jHKUn+sRFw8hOGI!-$2`~ zJifN4RkTX|Im`O*sOX#2pNhw%=kd1vP(?2xp0v)fo|yDi=Kn(I#QER4V0q1Y6@BMU zrP>CWzoeq?5+^-Idu@A}ODejE|2u2r@vI@n`P;_hS@Q})F~-+6!uD^e=ue2JnK8fb z-vKULZpUA6ey{<2hWX0~-NZcpHDs^B71lq4Q2tr)d5=kd(Xv|~ZB^F4j!=(y(RSJL z+E1!j6ziq!ipN-wZA;Ai5c+ZcuSqlGFEef z>hdHDDR~b!+s773z}Md*b#n5NyUrowL3`OlZ-YrS^WKP}6fefw`M%S~+NszTIv?tgokg_M-=5W#XMUE7Hqz3kz9 zYq&J%bDo@-E{*#*+O%Ab3&mQX}ItRKxh&;)1o{gy5v$l;E`Bg5a{? zS-};-bAqda7X&W~UTS1|CGnqs*2D#C!3n`h!70IM!3Du(!Lx!Zg69NR1uqC*6ui{P z?LdG0?Qy|ca6)iWa7u7ma6xcc@T}m9;5orn$Di47yH>p={7s_joy?8)?mD!mm*6`8 z8A*2ZXTk12D$QReFaN=y>Kz?DIcENN2Y*@YD&qe>t^e0+!`~6BUi=|QDRUI*%$P~J zD{B9)U5TLoUYquDx^0k|(`}z)=5#Bk{R39MeY;J!aoWe}_I{Rgx_uinr+tpMTKU$k z@Y*kb{RXNK@58Rro%?ML|M%K>(OKgz$~%_t&q-$_Qup3T;%&G;qP%%O_p5O4&m_AC zw>bciCUJY$)f(`l4wd+#q z7X{x{+TTFm9dMa%b~bB&{r2hMZMF8#Tl4k`Qfl8EbV&X74@}hd8^4vIXuNjrW%k=6 ek>B6n2AdaXw)K+UOBTWfq*MmqW6kB-@BBZpN7+~a literal 0 HcmV?d00001 diff --git a/net/udprelay/xdp/bpf_bpfel.go b/net/udprelay/xdp/bpf_bpfel.go new file mode 100644 index 000000000..b6599db04 --- /dev/null +++ b/net/udprelay/xdp/bpf_bpfel.go @@ -0,0 +1,131 @@ +// Code generated by bpf2go; DO NOT EDIT. +//go:build 386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 + +package xdp + +import ( + "bytes" + _ "embed" + "fmt" + "io" + + "github.com/cilium/ebpf" +) + +type bpfConfig struct{ DstPort uint16 } + +type bpfEndpoint struct { + ParticipantAddrs [2][4]uint32 + ParticipantPorts [2]uint16 + ParticipantIsIpv6 [2]uint8 + _ [2]byte +} + +// loadBpf returns the embedded CollectionSpec for bpf. +func loadBpf() (*ebpf.CollectionSpec, error) { + reader := bytes.NewReader(_BpfBytes) + spec, err := ebpf.LoadCollectionSpecFromReader(reader) + if err != nil { + return nil, fmt.Errorf("can't load bpf: %w", err) + } + + return spec, err +} + +// loadBpfObjects loads bpf and converts it into a struct. +// +// The following types are suitable as obj argument: +// +// *bpfObjects +// *bpfPrograms +// *bpfMaps +// +// See ebpf.CollectionSpec.LoadAndAssign documentation for details. +func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { + spec, err := loadBpf() + if err != nil { + return err + } + + return spec.LoadAndAssign(obj, opts) +} + +// bpfSpecs contains maps and programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfSpecs struct { + bpfProgramSpecs + bpfMapSpecs +} + +// bpfSpecs contains programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfProgramSpecs struct { + XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"` +} + +// bpfMapSpecs contains maps before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfMapSpecs struct { + ConfigMap *ebpf.MapSpec `ebpf:"config_map"` + EndpointMap *ebpf.MapSpec `ebpf:"endpoint_map"` +} + +// bpfObjects contains all objects after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfObjects struct { + bpfPrograms + bpfMaps +} + +func (o *bpfObjects) Close() error { + return _BpfClose( + &o.bpfPrograms, + &o.bpfMaps, + ) +} + +// bpfMaps contains all maps after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfMaps struct { + ConfigMap *ebpf.Map `ebpf:"config_map"` + EndpointMap *ebpf.Map `ebpf:"endpoint_map"` +} + +func (m *bpfMaps) Close() error { + return _BpfClose( + m.ConfigMap, + m.EndpointMap, + ) +} + +// bpfPrograms contains all programs after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfPrograms struct { + XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"` +} + +func (p *bpfPrograms) Close() error { + return _BpfClose( + p.XdpProgFunc, + ) +} + +func _BpfClose(closers ...io.Closer) error { + for _, closer := range closers { + if err := closer.Close(); err != nil { + return err + } + } + return nil +} + +// Do not access this directly. +// +//go:embed bpf_bpfel.o +var _BpfBytes []byte diff --git a/net/udprelay/xdp/bpf_bpfel.o b/net/udprelay/xdp/bpf_bpfel.o new file mode 100644 index 0000000000000000000000000000000000000000..c72c4cf84f785f2e0cb1a4fcbd56ce6b88383303 GIT binary patch literal 17248 zcmd6ue{7UTddJ6W8`}Y6n;*e;0=y7HY=WJ|{5YBfZUZp~DHk??6Yfsv^|D^$SN^eU z?>g8aNw>M<#8vsjKthmOU6({CROL=qn;hI-R9h;o=#@WQ6{)U{lU((rQYE^&y-2m% zNc8*6JhMCdV94q2pN@Dv?`NK8W}bOw=9%~1UH+hb|DI4uiL$9g{kJNREmdmftA+cB zzKy8G(7cNK5UyB#nJ$er=uAzVH+jVZ1wEx&zI#g-44z$N^7pEBe!N1dO57cILZj-a z2y32r*KkF-QfqPBa_Xio7(B*$p`WRC+zy^yp$j@fH&h+&g0ssszXGGadRM8A(BEi2 z*?0k@)KwnbRWcfBB5v}lFrR9k{ASMRpDSwN}$sUrPKtHu8s$u!WE+anyxh`k6`W>Mk=z0@x8NJoVOdgygZKZCbU%ETm zc3JbpS)Hj0w6_7bEpJ`Y1;N=SozpleUB$eRE0%|r=b3KG)l$tJ72j9i{RPJ1%)>TF zT2B43mfLwCJ7-bS5xSr{kq@%}J3{C6e7N@PguLagbNGvP?RXAq)b`LF*++H-=ay(e zVSI0EKKa6!_OYEOIj`}?FSLB(eXXwj@WtJ`ck9KXrg^0zJFgQgxAVEYH`+xU{RtRVjA_a%w7^J-YOEa2ZVZV1QNNZkR@w8s=!I`k;;2lrr zHD(#B;Eb8K;9R{aH`9Z1v#^49*O_uNjVRx!IXF+N7VGR`ovE6aIp1ya;7pso3Eo+& z^Fn{LYGD7nji#L2t%++?HSH!3&a~>A;GI=Q&)R!_y(vFqI z$KXp<%yXJ!FPZag!h|oS<2dPuKG*Ww=q>`L^*}2o-e~H%@oMe?@#-q-k&8|?p4RPk zRQy=)m*wgjO0k~b)0wKkxk@W=u*AfNj*^G8AU0^^v5>Lz=n^O*QM>qucC)(Opu__e z){os!x6wKIf8`oef3U=?_l}awTEEb~i9bh|T+-##KBYqYYc<>TDce7~=KET1m2J6hYM&CE*!ON(`Ftf*soXNAL^Il7UidKktfXg?0>?6Y=#2)VXT*fIKnu}|7_@*z{dH!$OioiXtz-eBTZ z?0w^xc(W8C%^{@$9pCVYE_e6mM{qRt6G1=Z2U^-R&+WL5?Djo)nCpmjhqlWsQg#Vx0GkZv|{DBXaGM=G>u_H%I2%uBk#ETn?mI4HRID?P3> zKd%?$G#awkoPX&CTE%Yr*a%34{h|?ZY~1*d_bHuA)Y9lhv;XYAo#5w@pw813DIU^z z;*D2~pD*fS9iPX=e!D_`N4sQ^UC(wO*nL(3chM8fRWpoyoNW1?@dK5U2WNU)$-KFOy5&;`8s9= z86C4~@71xLLAT6eleqTbLRwshKd#;MI8Bf2 z@M#&vfup$iQCuD4dk=Xp-$#%S^L;1sQ+%IA{tn;gkbi&+sgkzgpXuA;d4vtsG{--} zMO)z~xKOr;qV0Po1nW-=Fpt(;?K7G$_&{HOV)Pc%Nn(|78(N7!mmot~8TD}m_vUm_ z>SI%x2LaqHo} z!AxYO^(Bh6{g2_s+K*dGx_ljOsGMx!q|3=}Tkdh2WW(d;-ukGh4y|2VXS2~;yar`7 zrrlulp-1Y0DZ$4&mf8Egj*dA+FeV835klX;2pzPi+j)d4B2k|z7|1B!f9~MH=f2?| z+V{89rreUNj<@a}~u2?3f|Db{)zdwPZVBq*rBAxbw?qHxRF`|Y>{K4S_Ixv_hmiqqi z<~B7vkPeOyB)U8@IG!0%)JJM4dED1}HE zc6zo!vcT#=+_JNWhLcHpJ9c=-Qr&(}COMF9YCpW!kNW$f%~su3$u}LO?$@$a4N@&T zgPuNb%?@v?_xkI)SCUa8neH~8)C&8e(SrvLAMiV#k4n`h((N9K9ru&n-QIe~$bC`& z>wCP{J>&=W>^ana7}lb*uxqoPMK7NAhX=ZoeO;JHGt78;xZm#zy1Sdg8$JB9;|fw6 zJz9Fn?xqbvsu{9oOaYY1?^Ma#xzpRUtr>$ixZVqobR#uC<3071hgBN%LrE`ONH#%| zF!i5A1BLqYYEwH(`-fbFiHxW384wlIJPl-$J!$QmOfuv5B?k1^?JRpO&WK!pQ1#(1 zcr@OVi1!w?{)U(6ODA-{U%oJ2-u9rSvF*#(#S^UK1vWAl6`N!)EjFhBfg885QLB4uKI; zNEd4^!2BnCwBgfI)e7V%0*mDwR$QcO^ZnNA2)*tgCCNH5l0D54R3)^NBWX z`&Mui%f6sVO~UG;vO=_QCyus@?$hq5renk1^DT#}s&oR|5oTCfbg@>3Cuw@oK_*@}2;%D;g@4npT%`9@f~KE#m`zlgpzS}D3F1L2hUWlY>R7;ZqJ_Qn>y_2E zxu`+2nTGziHtQ<+HS!x&rTs@$?cr&HvfktAlf zwVZO_KbF|s=B4ybg(K%3&HSxmgeF4S_RKD^@dAy#NegguTeBX5J|gC;5SaMl=#$gt zQ7+??D=xxD7o5ARNu0nG@l*kMZ4R~)Nloeay82lTzYwMt4J9^6VhGQ_= zK6*;&aEzur)}Lir(Kh4~?a~xz_d>npkEC*w21Lck#S~)!wXJwB4_ft0S z1+)_A5nQ}36dOe6)R28XpbpLCG@&NebHRfA(`2Ct50$&3d;C57cKQ1c9Qgf?sQ>)F zgYCNy%aw`Ph92-7l-DNRgYAd?Lmj(!w;wtrqah;0aL|_t2JpJmA5V=o@nf6`MH^ke zr2Av>jUGMaKexaAMbY#9+M%b9qE@;d+zJATN4tr(9<2vB)(tPltYWvCfHaUeF;690 zC+6)$ggg_4=MQ;eXf_JK6cIUGLrqX}t5Kj!G6a1R3?1LuW*YONhcq2~tU`~O< z=|q=ro{00Wo2bJB_B__W`_p>EhpUS_$wf^%G ztxNnQ8dyJPKEot_8T>FlMWWAKELVZU%x^-zcwF0$Kh&lig_CH zy5~b`#NkG8mKmn0y8R(_+TmvK81rg;n6o4rQWqS41e{~0_N&p}ICBj8E4wfrl+$>A zNW7&)g&kfD-tO>naD@3+&|eH;)Zqo-Bg~KBQ>)s(km_`}8r;L&26^~(jMw36@M*!9 znFpc2Bo|VX4%dUHna8NV>mfD6d;{EgGelFY$46WZo@4%;+M_QQ@sGw&+z768cs01z z;U~cKeX-TA25)EHf)Bm6;v>8x4p)PF1dlO~LVw}@QiUPt_F#femzmE&ULP%0)6D-2 zt~y+*W|;pC+}u^FW*x2qS7IDwAIXVpnLT^}y)IL#=yyXTC$0iF2@W$KfPBGsN>zk8 z09U}tdzoJYS0BcFGhd?ix=K{1kf(%vM95DIIl9pH+4gfneoe@42>Fzd&j|UPkXOP_ zc2k7>ME+RMz1J@R-9D;0w&9_#l4ajS`h} zxDtGsnZDLpcCAEBI~)SfGCvFXl3R#x=$FPvyZ~I!+zEbk1hJI)E%2(>vHu;e0&jQp z>!2TDrtg2NvKYU^E5K2Q7lAvOuao|FN>q=-tH3Gde7G_&<*CF~%75tBbXX6w)G4wFA4%+>f}XyG*c;cz9G{7mgp ze5H6g;xNVAtiu#vPcz&2dck3euQ}$&(ceQ?O4SteHgE}?H_f~q{P6Wsb;sca;8|uL z^2N7G)tti%!1M(d+4~yFKSMlmcmcSU`8@I(Y@T|Dmx7xdUH}d={}u8jkCrL2i`pk{ z21gvO1V9X#b54C~uM&D> z@6VC1xK^h2GTY~gsKd1Wk2p;0y_4Cl_mso5-m}bP-zwO5nt2_*kg9qX@sqg?T>lBi z$NUC(#U#eZ{6p}{>zH4MSAeIP{{ixb8<=m0SAb`k{}b}n6l{^xcpLGh>tnadR1fpl ziEo#wl*22*Bg`4dH_Tvu99{=L&HN$ctL~Jk3(S8<{7bB7=1;+EKf`)vUbjf8HJ@WW zGtN{4AZ z*D~An>^V&9xry1X-!Stse1lrQrCe=i{u;R9S&ZM|72pS@Qib>}($6)UbHWuZ#d9!y z<%}b$DE+0TQkBB^-1PTcPUnl8{+!D()kW!3E>B_0xaqIC980w*{VSJ8Mf?BD<#f)v z$^0rD&(EUYg*+}#=>L@4q4Up8zvl8N{OqQGEwQ%M|qtp7~^rJuW-3#oA>JS!eFiY=yw`*3WpLWYZkp1Q@LfE)B6P_GM9dDLFX$a zVyZLFONiR1-)wy1F#RTj9uX*!oa)o_H6>!IPtVtsi0L;E8wrqzsgDRQN|xz;nUdvB zO$Da+V@g(@)l^`5|D|N*IZXwo_gzX>KB=j|Q-Ws%&kELqK$&f?R?mBZJ;C%oP08v< z1k?MoEAJGX5}Xx0CO9W}T=1meDZw*>X9d&yvTHBBAG_=c4hxP5jtcG+Y~zsiUslM+ z1m^^g3!W4_C3r^gtYEv9tv$8QdM8(Tg2RF%f}?^v1*ZgO1&;~N2_6?bDR@fojNn

=|sh3Ppx22 zFdj*Y^do|!f;$DL1ZM@03C;-~7d$C=O7M)}S;6+m%8#FqFw6GbwHy}mh+uo(Tm4QU zPYKQn9uu4sJT7=r@RZ;g!Lx$xQETn3<@3|BCpauPA~-6zQ*cUfR`8hMoZxZ6lY*xN z&j_9sY#%7>c zp5UVOCg2x2s1dj`z6g(w(M)0g)#qT$E zzt;-(1cwDj1V;sT3Qh^m3LX=j6Fe?>Qt*`E8NsuHA6>n2m1@o4H;k%vDACut>+qgd zf(LwgNqY1%V*ACW{v~ta2ZpM(ukY1<{bL{e64^GyU$^GJsxA86v1-K+M>4Tv$VdBi z&ZemSyLN^Bw(Tb0@)c&zH*vms8<%swc`NI0G5OZbCg03?8|PcwxSaE?o0vJ@w8^af z-{tRHsLYR?Ma^92|8VgLzmHgbWcWqfQ~H5JL<72nW0cGG6SgX-J&cUy;#2aD<{ z+)>Dkk4`_o;uG>qj7WlkMM@t-}qK z$^P$feQT%Hm-Wv>AZ;RDTmNPHkibcDJZB-B&wn}gUzF>TIn=l9-`YjtYQFv>Yn_UG zOPTDS9(U&Jf0p~V@q+rNqe1q60A;27?e2thu>I5T94P%CHrw6I literal 0 HcmV?d00001 diff --git a/net/udprelay/xdp/xdp.c b/net/udprelay/xdp/xdp.c new file mode 100644 index 000000000..386712975 --- /dev/null +++ b/net/udprelay/xdp/xdp.c @@ -0,0 +1,350 @@ +//go:build ignore + +#include +#include +#include +#include +#include +#include +#include +#include + +char _license[4] SEC("license") = "GPL"; + +struct config { + __u16 dst_port; +}; +struct config *unused_config __attribute__((unused)); // required by bpf2go -type + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct config)); + __uint(max_entries, 1); +} config_map SEC(".maps"); + +struct endpoint { + __be32 participant_addrs[2][4]; + __u16 participant_ports[2]; + __u8 participant_is_ipv6[2]; +}; +struct endpoint *unused_endpoint __attribute__((unused)); // required by bpf2go -type + +#define MAX_GENEVE_VNI (1 << 24) - 1 + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(__u32)); // key is Geneve VNI + __uint(value_size, sizeof(struct endpoint)); + __uint(max_entries, MAX_GENEVE_VNI); +} endpoint_map SEC(".maps"); + +#define MAX_UDP_LEN_IPV4 1480 + +#define MAX_UDP_LEN_IPV6 1460 + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1fff + +/* +Geneve Header: + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Ver| Opt Len |O|C| Rsvd. | Protocol Type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Virtual Network Identifier (VNI) | Reserved | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + ~ Variable-Length Options ~ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +*/ +struct geneve_header { + __u8 first; + __u8 second; + __be16 protocol; + __be32 vni; +}; + +static __always_inline __u16 csum_fold(__u32 csum) { + __u32 sum; + sum = (csum >> 16) + (csum & 0xffff); // maximum value 0x1fffe + sum += (sum >> 16); // maximum value 0xffff + return sum; +} + +static __always_inline __u16 csum_fold_flip(__u32 csum) { + __u32 sum; + sum = (csum >> 16) + (csum & 0xffff); // maximum value 0x1fffe + sum += (sum >> 16); // maximum value 0xffff + return ~sum; +} + +static __always_inline __u32 pseudo_sum_ipv6(struct ipv6hdr* ip6, __u16 udp_len) { + __u32 pseudo = 0; // TODO(jwhited): __u64 for intermediate checksum values to reduce number of ops + for (int i = 0; i < 8; i ++) { + pseudo += ip6->saddr.in6_u.u6_addr16[i]; + pseudo += ip6->daddr.in6_u.u6_addr16[i]; + } + pseudo += bpf_htons(ip6->nexthdr); + pseudo += udp_len; + return pseudo; +} + +static __always_inline __u32 pseudo_sum_ipv4(struct iphdr* ip, __u16 udp_len) { + __u32 pseudo = (__u16)ip->saddr; + pseudo += (__u16)(ip->saddr >> 16); + pseudo += (__u16)ip->daddr; + pseudo += (__u16)(ip->daddr >> 16); + pseudo += bpf_htons(ip->protocol); + pseudo += udp_len; + return pseudo; +} + +// csum_const_size is an alternative to bpf_csum_diff. It's a verifier +// workaround for when we are forced to use a constant max_size + bounds +// checking. The alternative being passing a dynamic length to bpf_csum_diff +// {from,to}_size arguments, which the verifier can't follow. For further info +// see: https://github.com/iovisor/bcc/issues/2463#issuecomment-512503958 +static __always_inline __u16 csum_const_size(__u32 seed, void* from, void* data_end, int max_size) { + __u16 *buf = from; + for (int i = 0; i < max_size; i += 2) { + if ((void *)(buf + 1) > data_end) { + break; + } + seed += *buf; + buf++; + } + if ((void *)buf + 1 <= data_end) { + seed += *(__u8 *)buf; + } + return csum_fold_flip(seed); +} + +SEC("xdp") +int xdp_prog_func(struct xdp_md *ctx) { + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + + struct ethhdr *eth = data; + if ((void *)(eth + 1) > data_end) { + return XDP_PASS; + } + + struct iphdr *ip; + struct ipv6hdr *ip6; + struct udphdr *udp; + + int validate_udp_csum = 0; + int is_ipv6 = 0; + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + ip = (void *)(eth + 1); + if ((void *)(ip + 1) > data_end) { + return XDP_PASS; + } + + if (ip->ihl != 5 || + ip->version != 4 || + ip->protocol != IPPROTO_UDP || + (ip->frag_off & bpf_htons(IP_MF | IP_OFFSET)) != 0) { + return XDP_PASS; + } + + // validate ipv4 header checksum + __u32 cs_unfolded = bpf_csum_diff(0, 0, (void *)ip, sizeof(*ip), 0); + __u16 cs = csum_fold_flip(cs_unfolded); + if (cs != 0) { + return XDP_PASS; + } + + if (bpf_ntohs(ip->tot_len) != data_end - (void *)ip) { + return XDP_PASS; + } + + udp = (void *)(ip + 1); + if ((void *)(udp + 1) > data_end) { + return XDP_PASS; + } + + if (udp->check != 0) { + // https://datatracker.ietf.org/doc/html/rfc768#page-3 + // If the computed checksum is zero, it is transmitted as all + // ones (the equivalent in one's complement arithmetic). An all + // zero transmitted checksum value means that the transmitter + // generated no checksum (for debugging or for higher level + // protocols that don't care). + validate_udp_csum = 1; + } + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + ip6 = (void *)(eth + 1); + if ((void *)(ip6 + 1) > data_end) { + return XDP_PASS; + } + + if (ip6->version != 6 || ip6->nexthdr != IPPROTO_UDP) { + return XDP_PASS; + } + + udp = (void *)(ip6 + 1); + if ((void *)(udp + 1) > data_end) { + return XDP_PASS; + } + + if (bpf_ntohs(ip6->payload_len) != data_end - (void *)udp) { + return XDP_PASS; + } + + // https://datatracker.ietf.org/doc/html/rfc8200#page-28 + // Unlike IPv4, the default behavior when UDP packets are + // originated by an IPv6 node is that the UDP checksum is not + // optional. That is, whenever originating a UDP packet, an IPv6 + // node must compute a UDP checksum over the packet and the + // pseudo-header, and, if that computation yields a result of + // zero, it must be changed to hex FFFF for placement in the UDP + // header. IPv6 receivers must discard UDP packets containing a + // zero checksum and should log the error. + validate_udp_csum = 1; + is_ipv6 = 1; + } else { + return XDP_PASS; + } + + __u32 config_key = 0; + struct config *c = bpf_map_lookup_elem(&config_map, &config_key); + if (!c) { + return XDP_PASS; + } + + if (bpf_ntohs(udp->len) != data_end - (void *)udp) { + return XDP_PASS; + } + + if (bpf_ntohs(udp->dest) != c->dst_port) { + return XDP_PASS; + } + + if (validate_udp_csum) { + __u16 cs; + __u32 pseudo_sum; + if (is_ipv6) { + pseudo_sum = pseudo_sum_ipv6(ip6, udp->len); + cs = csum_const_size(pseudo_sum, udp, data_end, MAX_UDP_LEN_IPV6); + } else { + pseudo_sum = pseudo_sum_ipv4(ip, udp->len); + cs = csum_const_size(pseudo_sum, udp, data_end, MAX_UDP_LEN_IPV4); + } + if (cs != 0) { + return XDP_PASS; + } + } + + struct geneve_header *geneve = (void *)(udp + 1); + if ((void *)(geneve +1) > data_end) { + return XDP_PASS; + } + + if (geneve->first != 0) { + // first 2 bits are version, must be zero + // next 6 bits are opt len, must be zero + return XDP_PASS; + } + + if (geneve->second != 0) { + // first bit is control, must be zero + // next bit is critical (options), must be zero + // next 6 bits are reserved, must be zero + return XDP_PASS; + } + + if ((geneve->vni & 0x000000FF) != 0) { + // last byte is reserved, must be zero + return XDP_PASS; + } + + __u32 vni_key = bpf_ntohl(geneve->vni) >> 8; + struct endpoint *e = bpf_map_lookup_elem(&endpoint_map, &vni_key); + if (!e) { + return XDP_PASS; + } + + int out_participant_index = -1; // -1 = unmatched + if (is_ipv6) { + // TODO + } else { + for (int i = 0; i < 2; i ++) { + if (e->participant_is_ipv6[i] == 0 && + e->participant_addrs[i][3] == ip->saddr && + e->participant_ports[i] == bpf_ntohs(udp->source)) + { + if (i == 0) { + out_participant_index = 1; + } else { + out_participant_index = 0; + } + break; + } + } + } + if (out_participant_index == -1) { + return XDP_PASS; + } + + if (e->participant_is_ipv6[out_participant_index] == is_ipv6) { + // matching in/out address family + if (is_ipv6) { + // TODO: in ipv6, out ipv6 + } else { + // TODO: in ipv4, out ipv4 + + // Update IPv4 header + __be32 p_addr = e->participant_addrs[out_participant_index][3]; + __u32 ip_csum = ~(__u32)ip->check; + __u32 udp_csum = ~(__u32)udp->check; + ip_csum = bpf_csum_diff(&ip->saddr, 4, &p_addr, 4, ip_csum); + udp_csum = bpf_csum_diff(&ip->saddr, 4, &p_addr, 4, udp_csum); + ip->check = csum_fold_flip(ip_csum); + ip->saddr = ip->daddr; + ip->daddr = p_addr; + + #define AF_INET 2 + struct bpf_fib_lookup fib_params = {}; + fib_params.family = AF_INET; + fib_params.tos = ip->tos; + fib_params.l4_protocol = ip->protocol; + fib_params.sport = 0; + fib_params.dport = 0; + fib_params.tot_len = bpf_ntohs(ip->tot_len); + fib_params.ipv4_src = ip->saddr; + fib_params.ipv4_dst = ip->daddr; + fib_params.ifindex = ctx->ingress_ifindex; + + int rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), BPF_FIB_LOOKUP_DIRECT); + if (rc != BPF_FIB_LKUP_RET_SUCCESS) { + return XDP_ABORTED; + } + + // Rewrite ethernet header source and destination address. + __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); + + // Update UDP header + __u32 old_udp_port = (__u32)udp->source; + __u32 new_udp_port = (__u32)bpf_htons(e->participant_ports[out_participant_index]); + udp_csum = bpf_csum_diff(&old_udp_port, 4, &new_udp_port, 4, udp_csum); + udp->check = csum_fold_flip(udp_csum); + udp->source = udp->dest; + udp->dest = bpf_htons(e->participant_ports[out_participant_index]); + udp = (void *)(ip + 1); + if ((void *)(udp +1) > data_end) { + return XDP_ABORTED; + } + + return XDP_TX; + } + } else if (e->participant_is_ipv6[out_participant_index] == 0) { + // TODO: in ipv4, out ipv6 + } else { + // TODO: in ipv6, out ipv4 + } + + return XDP_PASS; +} \ No newline at end of file diff --git a/net/udprelay/xdp/xdp.go b/net/udprelay/xdp/xdp.go new file mode 100644 index 000000000..14c361879 --- /dev/null +++ b/net/udprelay/xdp/xdp.go @@ -0,0 +1,48 @@ +package xdp + +import "net/netip" + +// XDPAttachFlags represents how XDP program will be attached to interface. This +// is a mirror of cilium/ebpf/link.AttachFlags, without pulling it in for +// non-Linux. +type XDPAttachFlags uint32 + +const ( + // XDPDriverFallbackGenericMode attempts XDPDriverMode, and falls back to + // XDPGenericMode if the driver does not support XDP. + XDPDriverFallbackGenericMode = 0 +) + +const ( + // XDPGenericMode (SKB) links XDP BPF program for drivers which do + // not yet support native XDP. + XDPGenericMode XDPAttachFlags = 1 << (iota + 1) + // XDPDriverMode links XDP BPF program into the driver’s receive path. + XDPDriverMode + // XDPOffloadMode offloads the entire XDP BPF program into hardware. + XDPOffloadMode +) + +type FIBConfig struct { + DeviceName string + // TODO: DstPort is singular, but udp4 and udp6 can be independent ports if + // the user supplied a zero port value. + DstPort uint16 + AttachFlags XDPAttachFlags +} + +func (f FIBConfig) validate() error { return nil } + +type FIBOption interface { + apply(*fibOptions) +} + +type fibOptions struct { + noAttach bool +} + +type FIB interface { + Delete(vni uint32) error + Upsert(vni uint32, participants [2]netip.AddrPort) error + Close() error +} diff --git a/net/udprelay/xdp/xdp_linux.go b/net/udprelay/xdp/xdp_linux.go new file mode 100644 index 000000000..9a61942e9 --- /dev/null +++ b/net/udprelay/xdp/xdp_linux.go @@ -0,0 +1,103 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package xdp + +import ( + "encoding/binary" + "errors" + "fmt" + "net" + "net/netip" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/link" +) + +//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type config -type endpoint bpf xdp.c -- -I ../../../derp/xdp/headers + +func NewFIB(config *FIBConfig, opts ...FIBOption) (FIB, error) { + o := &fibOptions{} + for _, opt := range opts { + opt.apply(o) + } + err := config.validate() + if err != nil { + return nil, fmt.Errorf("invalid config: %v", err) + } + objs := new(bpfObjects) + err = loadBpfObjects(objs, nil) + if err != nil { + var ve *ebpf.VerifierError + if errors.As(err, &ve) { + err = fmt.Errorf("verifier error: %+v", ve) + } + return nil, fmt.Errorf("error loading XDP program: %w", err) + } + f := &linuxFIB{ + objs: objs, + dstPort: config.DstPort, + } + var key uint32 + xdpConfig := &bpfConfig{ + DstPort: config.DstPort, + } + err = objs.ConfigMap.Put(key, xdpConfig) + if err != nil { + return nil, fmt.Errorf("error loading config in eBPF map: %w", err) + } + if o.noAttach { + return f, nil + } + iface, err := net.InterfaceByName(config.DeviceName) + if err != nil { + return nil, fmt.Errorf("error finding device: %w", err) + } + link, err := link.AttachXDP(link.XDPOptions{ + Program: objs.XdpProgFunc, + Interface: iface.Index, + Flags: link.XDPAttachFlags(config.AttachFlags), + }) + if err != nil { + return nil, fmt.Errorf("error attaching XDP program to dev: %w", err) + } + f.link = link + return f, nil +} + +type linuxFIB struct { + objs *bpfObjects + dstPort uint16 + link link.Link +} + +func (l *linuxFIB) Delete(vni uint32) error { + return l.objs.EndpointMap.Delete(&vni) +} + +func (l *linuxFIB) Upsert(vni uint32, participants [2]netip.AddrPort) error { + endpoint := bpfEndpoint{} + for i, participant := range participants { + as16 := participant.Addr().As16() + for j := 0; j < 4; j++ { + endpoint.ParticipantAddrs[i][j] = binary.NativeEndian.Uint32(as16[j*4:]) + } + endpoint.ParticipantPorts[i] = participant.Port() + if participant.Addr().Is6() { + endpoint.ParticipantIsIpv6[i] = 1 + } + } + numCPU, err := ebpf.PossibleCPU() + if err != nil { + return err + } + vals := make([]bpfEndpoint, numCPU) + for i := range vals { + vals[i] = endpoint + } + return l.objs.EndpointMap.Put(&vni, vals) +} + +func (l *linuxFIB) Close() error { return nil } diff --git a/net/udprelay/xdp/xdp_notlinux.go b/net/udprelay/xdp/xdp_notlinux.go new file mode 100644 index 000000000..ba1466e94 --- /dev/null +++ b/net/udprelay/xdp/xdp_notlinux.go @@ -0,0 +1,18 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !linux + +package xdp + +import "net/netip" + +type noopFIB struct{} + +func (noopFIB) Delete(vni uint32) error { return nil } +func (noopFIB) Upsert(vni uint32, participants [2]netip.AddrPort) error { return nil } +func (noopFIB) Close(vni uint32, participants [2]netip.AddrPort) error { return nil } + +func NewFIB(config FIBConfig, opts ...FIBOption) (FIB, error) { + return noopFIB{} +}