tstest/natlab/vmtest: add macOS VM support using Tart base images

Add macOS VM support to the vmtest framework using Tart's pre-built
macOS images (ghcr.io/cirruslabs/macos-tahoe-base) instead of building
from IPSW. The Tart image has SIP disabled and SSH enabled.

At test time, the Tart base image's disk, NVRAM, and hardware identity
are APFS-cloned into a tailmac-compatible directory layout, and the VM
is booted headlessly via tailmac's Host.app (Virtualization.framework)
with its NIC connected to vnet's dgram socket.

New features:
- tailmac.go: ensureTartImage (auto-pull), cloneTartToTailmac (format
  conversion), startTailMacVM (launch + cleanup)
- NoAgent() node option for VMs without TTA installed
- LANPing() for ICMP reachability testing via TTA's /ping endpoint
- IsMacOS field on OSImage, with GOOS/GOARCH support
- Dgram socket listener in Start() for macOS VMs
- Fix ReadFromUnix error spam on dgram socket close in vnet

TestMacOSAndLinuxCanPing verifies a macOS Tart VM and a gokrazy Linux
VM can ping each other on the same vnet LAN.

Updates #13038

Change-Id: I5e73a27878abf009f780fdf11a346fc857711cff
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2026-04-28 12:10:45 -07:00 committed by Brad Fitzpatrick
parent ec7b11d986
commit b2d4ba04b6
5 changed files with 408 additions and 17 deletions

View File

@ -26,10 +26,14 @@ type OSImage struct {
SHA256 string // expected SHA256 hash of the image (of the final qcow2, after any decompression)
MemoryMB int // RAM for the VM
IsGokrazy bool // true for gokrazy images (different QEMU setup)
IsMacOS bool // true for macOS images (launched via tailmac, not QEMU)
}
// GOOS returns the Go OS name for this image.
func (img OSImage) GOOS() string {
if img.IsMacOS {
return "darwin"
}
if img.IsGokrazy {
return "linux"
}
@ -41,6 +45,9 @@ func (img OSImage) GOOS() string {
// GOARCH returns the Go architecture name for this image.
func (img OSImage) GOARCH() string {
if img.IsMacOS {
return "arm64"
}
return "amd64"
}
@ -73,6 +80,15 @@ var (
URL: "https://download.freebsd.org/releases/VM-IMAGES/15.0-RELEASE/amd64/Latest/FreeBSD-15.0-RELEASE-amd64-BASIC-CLOUDINIT-ufs.qcow2.xz",
MemoryMB: 1024,
}
// MacOS is a macOS VM launched via tailmac (Apple Virtualization.framework).
// Uses a Tart pre-built base image (ghcr.io/cirruslabs/macos-tahoe-base)
// which is automatically pulled on first use. Only runs on macOS arm64 hosts.
MacOS = OSImage{
Name: "macos",
IsMacOS: true,
MemoryMB: 4096,
}
)
// imageCacheDir returns the directory for cached VM images.

View File

@ -0,0 +1,236 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package vmtest
import (
"encoding/base64"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
)
const tartImage = "ghcr.io/cirruslabs/macos-tahoe-base:latest"
// tartConfig is the subset of Tart's config.json we need.
type tartConfig struct {
HardwareModel string `json:"hardwareModel"` // base64
ECID string `json:"ecid"` // base64
}
// ensureTartImage checks that the Tart base image is available, pulling it
// if necessary. Returns the path to a directory containing disk.img,
// nvram.bin, and config.json.
func ensureTartImage(t testing.TB) string {
if _, err := exec.LookPath("tart"); err != nil {
t.Skip("tart not installed; skipping macOS VM test")
}
home, err := os.UserHomeDir()
if err != nil {
t.Fatalf("UserHomeDir: %v", err)
}
// Check OCI cache first (from a previous "tart pull").
ociDir := filepath.Join(home, ".tart", "cache", "OCIs",
"ghcr.io", "cirruslabs", "macos-tahoe-base", "latest")
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
return ociDir
}
t.Logf("pulling Tart image %s ...", tartImage)
cmd := exec.Command("tart", "pull", tartImage)
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
t.Fatalf("tart pull: %v", err)
}
// After pull, the OCI cache should have it.
if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil {
return ociDir
}
t.Fatalf("tart pull succeeded but image not found at %s", ociDir)
return ""
}
// ensureTailMac locates the pre-built tailmac Host.app binary.
func (e *Env) ensureTailMac() error {
modRoot, err := findModRoot()
if err != nil {
return err
}
e.tailmacDir = filepath.Join(modRoot, "tstest", "tailmac", "bin")
hostApp := filepath.Join(e.tailmacDir, "Host.app", "Contents", "MacOS", "Host")
if _, err := os.Stat(hostApp); err != nil {
return fmt.Errorf("tailmac Host.app not found at %s; run 'make all' in tstest/tailmac/", hostApp)
}
return nil
}
// cloneTartToTailmac creates a tailmac-compatible VM directory from a Tart
// base image. It uses APFS CoW clones for the disk and NVRAM, and extracts
// the hardware identity from Tart's config.json.
func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error {
if err := os.MkdirAll(cloneDir, 0755); err != nil {
return err
}
// Read Tart's config.json for hardware identity.
cfgData, err := os.ReadFile(filepath.Join(tartDir, "config.json"))
if err != nil {
return fmt.Errorf("reading tart config: %w", err)
}
var tc tartConfig
if err := json.Unmarshal(cfgData, &tc); err != nil {
return fmt.Errorf("parsing tart config: %w", err)
}
// Decode and write HardwareModel.
hwModel, err := base64.StdEncoding.DecodeString(tc.HardwareModel)
if err != nil {
return fmt.Errorf("decoding hardwareModel: %w", err)
}
if err := os.WriteFile(filepath.Join(cloneDir, "HardwareModel"), hwModel, 0644); err != nil {
return err
}
// Decode and write MachineIdentifier (ECID).
ecid, err := base64.StdEncoding.DecodeString(tc.ECID)
if err != nil {
return fmt.Errorf("decoding ecid: %w", err)
}
if err := os.WriteFile(filepath.Join(cloneDir, "MachineIdentifier"), ecid, 0644); err != nil {
return err
}
// APFS clone the disk image (nearly instant, copy-on-write).
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err != nil {
// Fallback to regular copy.
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err2 != nil {
return fmt.Errorf("copying disk: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
}
}
// APFS clone the NVRAM.
if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err != nil {
if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err2 != nil {
return fmt.Errorf("copying nvram: %v: %s (APFS clone: %v: %s)", err2, out2, err, out)
}
}
// Write tailmac config.json.
tmCfg := struct {
VMid string `json:"vmID"`
ServerSocket string `json:"serverSocket"`
MemorySize uint64 `json:"memorySize"`
Mac string `json:"mac"`
}{
VMid: testID,
ServerSocket: dgramSock,
MemorySize: 8 * 1024 * 1024 * 1024,
Mac: mac,
}
tmData, _ := json.MarshalIndent(tmCfg, "", " ")
return os.WriteFile(filepath.Join(cloneDir, "config.json"), tmData, 0644)
}
// startTailMacVM clones a Tart base image and launches it via tailmac
// Host.app in headless mode, connected to vnet's dgram socket.
func (e *Env) startTailMacVM(n *Node) error {
tartDir := ensureTartImage(e.t)
if err := e.ensureTailMac(); err != nil {
return err
}
testID := fmt.Sprintf("vmtest-%s-%d", n.name, os.Getpid())
// Host.app expects VM files under ~/.cache/tailscale/vmtest/macos/<id>/
// (hardcoded in Config.swift's vmBundleURL).
home, err := os.UserHomeDir()
if err != nil {
return fmt.Errorf("UserHomeDir: %w", err)
}
vmBase := filepath.Join(home, ".cache", "tailscale", "vmtest", "macos")
os.MkdirAll(vmBase, 0755)
cloneDir := filepath.Join(vmBase, testID)
mac := n.vnetNode.NICMac(0)
e.t.Logf("[%s] cloning Tart image -> %s (mac=%s)", n.name, testID, mac)
if err := cloneTartToTailmac(tartDir, cloneDir, testID, mac.String(), e.dgramSockAddr); err != nil {
return fmt.Errorf("cloning tart VM: %w", err)
}
e.t.Cleanup(func() { os.RemoveAll(cloneDir) })
// Launch Host.app in headless mode with disconnected NIC,
// then hot-swap to the vnet dgram socket after boot.
hostBin := filepath.Join(e.tailmacDir, "Host.app", "Contents", "MacOS", "Host")
args := []string{
"run", "--id", testID, "--headless",
}
logPath := filepath.Join(e.tempDir, n.name+"-tailmac.log")
logFile, err := os.Create(logPath)
if err != nil {
return fmt.Errorf("creating log file: %w", err)
}
cmd := exec.Command(hostBin, args...)
// NSUnbufferedIO forces Swift/Foundation to unbuffer stdout so we can
// see output in the log file as it happens.
cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES")
cmd.Stdout = logFile
cmd.Stderr = logFile
devNull, err := os.Open(os.DevNull)
if err != nil {
logFile.Close()
return fmt.Errorf("open /dev/null: %w", err)
}
cmd.Stdin = devNull
if err := cmd.Start(); err != nil {
devNull.Close()
logFile.Close()
return fmt.Errorf("starting tailmac for %s: %w", n.name, err)
}
e.t.Logf("[%s] launched tailmac (pid %d), log: %s", n.name, cmd.Process.Pid, logPath)
clientSock := fmt.Sprintf("/tmp/qemu-dgram-%s.sock", testID)
e.t.Cleanup(func() {
cmd.Process.Signal(os.Interrupt)
done := make(chan error, 1)
go func() { done <- cmd.Wait() }()
select {
case <-done:
case <-time.After(15 * time.Second):
cmd.Process.Kill()
<-done
}
devNull.Close()
logFile.Close()
os.Remove(clientSock)
if e.t.Failed() {
if data, err := os.ReadFile(logPath); err == nil {
lines := strings.Split(string(data), "\n")
start := 0
if len(lines) > 50 {
start = len(lines) - 50
}
e.t.Logf("=== last 50 lines of %s tailmac log ===", n.name)
for _, line := range lines[start:] {
e.t.Logf("[%s] %s", n.name, line)
}
}
}
})
return nil
}

View File

@ -29,6 +29,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
@ -63,8 +64,9 @@ type Env struct {
nodes []*Node
tempDir string
sockAddr string // shared Unix socket path for all QEMU netdevs
binDir string // directory for compiled binaries
sockAddr string // shared Unix socket path for all QEMU netdevs
dgramSockAddr string // Unix dgram socket path for macOS VMs (tailmac)
binDir string // directory for compiled binaries
// testVersion is the resolved Tailscale release version to use (empty if
// building from source). When non-empty, tailscale and tailscaled binaries
@ -75,6 +77,9 @@ type Env struct {
gokrazyBase string // path to gokrazy base qcow2 image
gokrazyKernel string // path to gokrazy kernel
// tailmac-specific paths (macOS VMs)
tailmacDir string // path to tailmac bin/ directory containing Host.app
qemuProcs []*exec.Cmd // launched QEMU processes
sameTailnetUser bool // all nodes register as the same Tailnet user
@ -300,6 +305,7 @@ type Node struct {
vnetNode *vnet.Node // primary vnet node (set during Start)
agent *vnet.NodeAgentClient
joinTailnet bool
noAgent bool // true to skip TTA agent setup (e.g. macOS VMs without TTA)
advertiseRoutes string
snatSubnetRoutes *bool // nil means default (true)
webServerPort int
@ -329,6 +335,8 @@ func (e *Env) AddNode(name string, opts ...any) *Node {
case nodeOptNoTailscale:
n.joinTailnet = false
vnetOpts = append(vnetOpts, vnet.DontJoinTailnet)
case nodeOptNoAgent:
n.noAgent = true
case nodeOptAdvertiseRoutes:
n.advertiseRoutes = string(o)
case nodeOptSNATSubnetRoutes:
@ -357,6 +365,7 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr {
type nodeOptOS OSImage
type nodeOptNoTailscale struct{}
type nodeOptNoAgent struct{}
type nodeOptAdvertiseRoutes string
type nodeOptSNATSubnetRoutes bool
type nodeOptWebServer int
@ -367,6 +376,11 @@ func OS(img OSImage) nodeOptOS { return nodeOptOS(img) }
// DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up.
func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} }
// NoAgent returns a NodeOption that skips TTA agent setup. The node will not
// have a test agent, so agent-dependent operations (Status, ExecOnNode, etc.)
// won't work. Useful for VMs that just need to boot and respond to ICMP.
func NoAgent() nodeOptNoAgent { return nodeOptNoAgent{} }
// AdvertiseRoutes returns a NodeOption that configures the node to advertise
// the given routes (comma-separated CIDRs) when joining the tailnet.
func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes {
@ -411,12 +425,27 @@ func (e *Env) Start() {
t.Logf("using Tailscale release version %s (from --test-version=%q)", v, *testVersion)
}
// Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy
// images). Gokrazy has binaries built-in, so doesn't need compilation.
// Check if any macOS nodes are present; if so, verify prerequisites.
hasMacOS := false
for _, n := range e.nodes {
if n.os.IsMacOS {
hasMacOS = true
break
}
}
if hasMacOS {
if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" {
t.Skip("macOS VM tests require macOS arm64 host")
}
}
// Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy,
// non-macOS images). Gokrazy has binaries built-in. macOS VMs don't use
// compiled binaries (no TTA agent).
type platform struct{ goos, goarch string }
needPlatform := set.Set[platform]{}
for _, n := range e.nodes {
if !n.os.IsGokrazy {
if !n.os.IsGokrazy && !n.os.IsMacOS {
needPlatform.Add(platform{n.os.GOOS(), n.os.GOARCH()})
}
}
@ -438,7 +467,9 @@ func (e *Env) Start() {
continue
}
didOS.Add(n.os.Name)
if n.os.IsGokrazy {
if n.os.IsMacOS {
imageSteps[n.os.Name] = e.AddStep("Prepare macOS Tart image")
} else if n.os.IsGokrazy {
imageSteps["gokrazy"] = e.AddStep("Build gokrazy image")
} else {
imageSteps[n.os.Name] = e.AddStep(fmt.Sprintf("Prepare %s image", n.os.Name))
@ -446,12 +477,18 @@ func (e *Env) Start() {
}
vnetStep := e.AddStep("Create virtual network")
qemuSteps := map[string]*Step{}
vmSteps := map[string]*Step{}
agentSteps := map[string]*Step{}
tsUpSteps := map[string]*Step{}
for _, n := range e.nodes {
qemuSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name))
agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name))
if n.os.IsMacOS {
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch macOS VM: %s", n.name))
} else {
vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name))
}
if !n.noAgent {
agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name))
}
if n.joinTailnet {
tsUpSteps[n.name] = e.AddStep(fmt.Sprintf("Tailscale up: %s", n.name))
}
@ -485,7 +522,15 @@ func (e *Env) Start() {
continue
}
didOS.Add(n.os.Name)
if n.os.IsGokrazy {
if n.os.IsMacOS {
step := imageSteps[n.os.Name]
eg.Go(func() error {
step.Begin()
ensureTartImage(t)
step.End(nil)
return nil
})
} else if n.os.IsGokrazy {
step := imageSteps["gokrazy"]
eg.Go(func() error {
step.Begin()
@ -591,7 +636,7 @@ func (e *Env) Start() {
// not via the cloud-init HTTP VIP, because network-config must be available
// during init-local before systemd-networkd-wait-online blocks.
// Start Unix socket listener.
// Start Unix stream socket listener (for QEMU VMs).
e.sockAddr = filepath.Join(e.tempDir, "vnet.sock")
srv, err := net.Listen("unix", e.sockAddr)
if err != nil {
@ -609,18 +654,45 @@ func (e *Env) Start() {
}
}()
// Launch QEMU processes.
// Start Unix dgram socket listener (for macOS VMs via tailmac).
// Use /tmp/ instead of the test temp dir because Unix socket paths
// are limited to 104 bytes on macOS, and test temp dir paths are long.
if hasMacOS {
e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid())
t.Cleanup(func() { os.Remove(e.dgramSockAddr) })
dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr)
if err != nil {
t.Fatalf("resolve dgram addr: %v", err)
}
uc, err := net.ListenUnixgram("unixgram", dgramAddr)
if err != nil {
t.Fatalf("listen unixgram: %v", err)
}
t.Cleanup(func() { uc.Close() })
go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM)
}
// Launch VM processes.
for _, n := range e.nodes {
step := qemuSteps[n.name]
step := vmSteps[n.name]
step.Begin()
if err := e.startQEMU(n); err != nil {
t.Fatalf("startQEMU(%s): %v", n.name, err)
if n.os.IsMacOS {
if err := e.startTailMacVM(n); err != nil {
t.Fatalf("startTailMacVM(%s): %v", n.name, err)
}
} else {
if err := e.startQEMU(n); err != nil {
t.Fatalf("startQEMU(%s): %v", n.name, err)
}
}
step.End(nil)
}
// Set up agent clients and wait for all agents to connect.
for _, n := range e.nodes {
if n.noAgent {
continue
}
n.agent = e.server.NodeAgentClient(n.vnetNode)
n.vnetNode.SetClient(n.agent)
}
@ -628,6 +700,9 @@ func (e *Env) Start() {
// Wait for agents, then bring up tailscale.
var agentEg errgroup.Group
for _, n := range e.nodes {
if n.noAgent {
continue
}
agentEg.Go(func() error {
aStep := agentSteps[n.name]
aStep.Begin()
@ -1123,6 +1198,51 @@ func (e *Env) HTTPGet(from *Node, targetURL string) string {
return ""
}
// Agent returns the node's TTA agent client, or nil if NoAgent is set.
func (n *Node) Agent() *vnet.NodeAgentClient {
return n.agent
}
// LANPing pings a LAN IP from the given node using TTA's /ping endpoint.
// It retries for up to 2 minutes, which is enough for a macOS VM to boot
// and acquire a DHCP lease.
func (e *Env) LANPing(from *Node, targetIP netip.Addr) {
if from.agent == nil {
e.t.Fatalf("LANPing: node %s has no agent (NoAgent set?)", from.name)
}
e.t.Logf("LANPing: %s -> %s", from.name, targetIP)
deadline := time.Now().Add(2 * time.Minute)
for attempt := 0; time.Now().Before(deadline); attempt++ {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
reqURL := fmt.Sprintf("http://unused/ping?host=%s", targetIP)
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
if err != nil {
cancel()
e.t.Fatalf("LANPing: %v", err)
}
res, err := from.agent.HTTPClient.Do(req)
cancel()
if err != nil {
if attempt%10 == 0 {
e.t.Logf("LANPing attempt %d: %v", attempt+1, err)
}
time.Sleep(2 * time.Second)
continue
}
body, _ := io.ReadAll(res.Body)
res.Body.Close()
if res.StatusCode == 200 {
e.t.Logf("LANPing: %s -> %s succeeded on attempt %d", from.name, targetIP, attempt+1)
return
}
if attempt%10 == 0 {
e.t.Logf("LANPing attempt %d: status %d, body: %s", attempt+1, res.StatusCode, string(body))
}
time.Sleep(2 * time.Second)
}
e.t.Fatalf("LANPing: %s -> %s timed out after 2 minutes", from.name, targetIP)
}
// SendTaildropFile sends a file via Taildrop from one node to another.
// The to node must be on the tailnet. It fatals on error.
func (e *Env) SendTaildropFile(from, to *Node, name string, content []byte) {

View File

@ -16,6 +16,26 @@ import (
"tailscale.com/tstest/natlab/vnet"
)
func TestMacOSAndLinuxCanPing(t *testing.T) {
env := vmtest.New(t)
lan := env.AddNetwork("192.168.1.1/24")
linux := env.AddNode("linux", lan,
vmtest.OS(vmtest.Gokrazy),
vmtest.DontJoinTailnet())
macos := env.AddNode("macos", lan,
vmtest.OS(vmtest.MacOS),
vmtest.DontJoinTailnet(),
vmtest.NoAgent())
env.Start()
// Ping from Linux (which has TTA) to macOS (which just responds to ICMP).
// LANPing retries until the macOS VM has booted and acquired a DHCP lease.
env.LANPing(linux, macos.LanIP(lan))
}
func TestSubnetRouter(t *testing.T) {
testSubnetRouterForOS(t, vmtest.Ubuntu2404)
}

View File

@ -1077,8 +1077,7 @@ func (s *Server) ServeUnixConn(uc *net.UnixConn, proto Protocol) {
n, addr, err := uc.ReadFromUnix(buf)
raddr = addr
if err != nil {
if s.shutdownCtx.Err() != nil {
// Return without logging.
if s.shutdownCtx.Err() != nil || errors.Is(err, net.ErrClosed) {
return
}
s.logf("ReadFromUnix: %#v", err)