diff --git a/tstest/natlab/vmtest/images.go b/tstest/natlab/vmtest/images.go index 49eba443f..bce5452a4 100644 --- a/tstest/natlab/vmtest/images.go +++ b/tstest/natlab/vmtest/images.go @@ -26,10 +26,14 @@ type OSImage struct { SHA256 string // expected SHA256 hash of the image (of the final qcow2, after any decompression) MemoryMB int // RAM for the VM IsGokrazy bool // true for gokrazy images (different QEMU setup) + IsMacOS bool // true for macOS images (launched via tailmac, not QEMU) } // GOOS returns the Go OS name for this image. func (img OSImage) GOOS() string { + if img.IsMacOS { + return "darwin" + } if img.IsGokrazy { return "linux" } @@ -41,6 +45,9 @@ func (img OSImage) GOOS() string { // GOARCH returns the Go architecture name for this image. func (img OSImage) GOARCH() string { + if img.IsMacOS { + return "arm64" + } return "amd64" } @@ -73,6 +80,15 @@ var ( URL: "https://download.freebsd.org/releases/VM-IMAGES/15.0-RELEASE/amd64/Latest/FreeBSD-15.0-RELEASE-amd64-BASIC-CLOUDINIT-ufs.qcow2.xz", MemoryMB: 1024, } + + // MacOS is a macOS VM launched via tailmac (Apple Virtualization.framework). + // Uses a Tart pre-built base image (ghcr.io/cirruslabs/macos-tahoe-base) + // which is automatically pulled on first use. Only runs on macOS arm64 hosts. + MacOS = OSImage{ + Name: "macos", + IsMacOS: true, + MemoryMB: 4096, + } ) // imageCacheDir returns the directory for cached VM images. diff --git a/tstest/natlab/vmtest/tailmac.go b/tstest/natlab/vmtest/tailmac.go new file mode 100644 index 000000000..44f5648ec --- /dev/null +++ b/tstest/natlab/vmtest/tailmac.go @@ -0,0 +1,236 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" +) + +const tartImage = "ghcr.io/cirruslabs/macos-tahoe-base:latest" + +// tartConfig is the subset of Tart's config.json we need. +type tartConfig struct { + HardwareModel string `json:"hardwareModel"` // base64 + ECID string `json:"ecid"` // base64 +} + +// ensureTartImage checks that the Tart base image is available, pulling it +// if necessary. Returns the path to a directory containing disk.img, +// nvram.bin, and config.json. +func ensureTartImage(t testing.TB) string { + if _, err := exec.LookPath("tart"); err != nil { + t.Skip("tart not installed; skipping macOS VM test") + } + + home, err := os.UserHomeDir() + if err != nil { + t.Fatalf("UserHomeDir: %v", err) + } + + // Check OCI cache first (from a previous "tart pull"). + ociDir := filepath.Join(home, ".tart", "cache", "OCIs", + "ghcr.io", "cirruslabs", "macos-tahoe-base", "latest") + if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil { + return ociDir + } + + t.Logf("pulling Tart image %s ...", tartImage) + cmd := exec.Command("tart", "pull", tartImage) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + t.Fatalf("tart pull: %v", err) + } + + // After pull, the OCI cache should have it. + if _, err := os.Stat(filepath.Join(ociDir, "disk.img")); err == nil { + return ociDir + } + t.Fatalf("tart pull succeeded but image not found at %s", ociDir) + return "" +} + +// ensureTailMac locates the pre-built tailmac Host.app binary. +func (e *Env) ensureTailMac() error { + modRoot, err := findModRoot() + if err != nil { + return err + } + e.tailmacDir = filepath.Join(modRoot, "tstest", "tailmac", "bin") + hostApp := filepath.Join(e.tailmacDir, "Host.app", "Contents", "MacOS", "Host") + if _, err := os.Stat(hostApp); err != nil { + return fmt.Errorf("tailmac Host.app not found at %s; run 'make all' in tstest/tailmac/", hostApp) + } + return nil +} + +// cloneTartToTailmac creates a tailmac-compatible VM directory from a Tart +// base image. It uses APFS CoW clones for the disk and NVRAM, and extracts +// the hardware identity from Tart's config.json. +func cloneTartToTailmac(tartDir, cloneDir, testID, mac, dgramSock string) error { + if err := os.MkdirAll(cloneDir, 0755); err != nil { + return err + } + + // Read Tart's config.json for hardware identity. + cfgData, err := os.ReadFile(filepath.Join(tartDir, "config.json")) + if err != nil { + return fmt.Errorf("reading tart config: %w", err) + } + var tc tartConfig + if err := json.Unmarshal(cfgData, &tc); err != nil { + return fmt.Errorf("parsing tart config: %w", err) + } + + // Decode and write HardwareModel. + hwModel, err := base64.StdEncoding.DecodeString(tc.HardwareModel) + if err != nil { + return fmt.Errorf("decoding hardwareModel: %w", err) + } + if err := os.WriteFile(filepath.Join(cloneDir, "HardwareModel"), hwModel, 0644); err != nil { + return err + } + + // Decode and write MachineIdentifier (ECID). + ecid, err := base64.StdEncoding.DecodeString(tc.ECID) + if err != nil { + return fmt.Errorf("decoding ecid: %w", err) + } + if err := os.WriteFile(filepath.Join(cloneDir, "MachineIdentifier"), ecid, 0644); err != nil { + return err + } + + // APFS clone the disk image (nearly instant, copy-on-write). + if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err != nil { + // Fallback to regular copy. + if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "disk.img"), filepath.Join(cloneDir, "Disk.img")).CombinedOutput(); err2 != nil { + return fmt.Errorf("copying disk: %v: %s (APFS clone: %v: %s)", err2, out2, err, out) + } + } + + // APFS clone the NVRAM. + if out, err := exec.Command("cp", "-c", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err != nil { + if out2, err2 := exec.Command("cp", filepath.Join(tartDir, "nvram.bin"), filepath.Join(cloneDir, "AuxiliaryStorage")).CombinedOutput(); err2 != nil { + return fmt.Errorf("copying nvram: %v: %s (APFS clone: %v: %s)", err2, out2, err, out) + } + } + + // Write tailmac config.json. + tmCfg := struct { + VMid string `json:"vmID"` + ServerSocket string `json:"serverSocket"` + MemorySize uint64 `json:"memorySize"` + Mac string `json:"mac"` + }{ + VMid: testID, + ServerSocket: dgramSock, + MemorySize: 8 * 1024 * 1024 * 1024, + Mac: mac, + } + tmData, _ := json.MarshalIndent(tmCfg, "", " ") + return os.WriteFile(filepath.Join(cloneDir, "config.json"), tmData, 0644) +} + +// startTailMacVM clones a Tart base image and launches it via tailmac +// Host.app in headless mode, connected to vnet's dgram socket. +func (e *Env) startTailMacVM(n *Node) error { + tartDir := ensureTartImage(e.t) + + if err := e.ensureTailMac(); err != nil { + return err + } + + testID := fmt.Sprintf("vmtest-%s-%d", n.name, os.Getpid()) + + // Host.app expects VM files under ~/.cache/tailscale/vmtest/macos// + // (hardcoded in Config.swift's vmBundleURL). + home, err := os.UserHomeDir() + if err != nil { + return fmt.Errorf("UserHomeDir: %w", err) + } + vmBase := filepath.Join(home, ".cache", "tailscale", "vmtest", "macos") + os.MkdirAll(vmBase, 0755) + cloneDir := filepath.Join(vmBase, testID) + + mac := n.vnetNode.NICMac(0) + e.t.Logf("[%s] cloning Tart image -> %s (mac=%s)", n.name, testID, mac) + if err := cloneTartToTailmac(tartDir, cloneDir, testID, mac.String(), e.dgramSockAddr); err != nil { + return fmt.Errorf("cloning tart VM: %w", err) + } + e.t.Cleanup(func() { os.RemoveAll(cloneDir) }) + + // Launch Host.app in headless mode with disconnected NIC, + // then hot-swap to the vnet dgram socket after boot. + hostBin := filepath.Join(e.tailmacDir, "Host.app", "Contents", "MacOS", "Host") + args := []string{ + "run", "--id", testID, "--headless", + } + + logPath := filepath.Join(e.tempDir, n.name+"-tailmac.log") + logFile, err := os.Create(logPath) + if err != nil { + return fmt.Errorf("creating log file: %w", err) + } + + cmd := exec.Command(hostBin, args...) + // NSUnbufferedIO forces Swift/Foundation to unbuffer stdout so we can + // see output in the log file as it happens. + cmd.Env = append(os.Environ(), "NSUnbufferedIO=YES") + cmd.Stdout = logFile + cmd.Stderr = logFile + devNull, err := os.Open(os.DevNull) + if err != nil { + logFile.Close() + return fmt.Errorf("open /dev/null: %w", err) + } + cmd.Stdin = devNull + + if err := cmd.Start(); err != nil { + devNull.Close() + logFile.Close() + return fmt.Errorf("starting tailmac for %s: %w", n.name, err) + } + e.t.Logf("[%s] launched tailmac (pid %d), log: %s", n.name, cmd.Process.Pid, logPath) + + clientSock := fmt.Sprintf("/tmp/qemu-dgram-%s.sock", testID) + + e.t.Cleanup(func() { + cmd.Process.Signal(os.Interrupt) + done := make(chan error, 1) + go func() { done <- cmd.Wait() }() + select { + case <-done: + case <-time.After(15 * time.Second): + cmd.Process.Kill() + <-done + } + devNull.Close() + logFile.Close() + os.Remove(clientSock) + + if e.t.Failed() { + if data, err := os.ReadFile(logPath); err == nil { + lines := strings.Split(string(data), "\n") + start := 0 + if len(lines) > 50 { + start = len(lines) - 50 + } + e.t.Logf("=== last 50 lines of %s tailmac log ===", n.name) + for _, line := range lines[start:] { + e.t.Logf("[%s] %s", n.name, line) + } + } + } + }) + + return nil +} diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go index 58a344ac1..5690fbb75 100644 --- a/tstest/natlab/vmtest/vmtest.go +++ b/tstest/natlab/vmtest/vmtest.go @@ -29,6 +29,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strconv" "strings" "sync" @@ -63,8 +64,9 @@ type Env struct { nodes []*Node tempDir string - sockAddr string // shared Unix socket path for all QEMU netdevs - binDir string // directory for compiled binaries + sockAddr string // shared Unix socket path for all QEMU netdevs + dgramSockAddr string // Unix dgram socket path for macOS VMs (tailmac) + binDir string // directory for compiled binaries // testVersion is the resolved Tailscale release version to use (empty if // building from source). When non-empty, tailscale and tailscaled binaries @@ -75,6 +77,9 @@ type Env struct { gokrazyBase string // path to gokrazy base qcow2 image gokrazyKernel string // path to gokrazy kernel + // tailmac-specific paths (macOS VMs) + tailmacDir string // path to tailmac bin/ directory containing Host.app + qemuProcs []*exec.Cmd // launched QEMU processes sameTailnetUser bool // all nodes register as the same Tailnet user @@ -300,6 +305,7 @@ type Node struct { vnetNode *vnet.Node // primary vnet node (set during Start) agent *vnet.NodeAgentClient joinTailnet bool + noAgent bool // true to skip TTA agent setup (e.g. macOS VMs without TTA) advertiseRoutes string snatSubnetRoutes *bool // nil means default (true) webServerPort int @@ -329,6 +335,8 @@ func (e *Env) AddNode(name string, opts ...any) *Node { case nodeOptNoTailscale: n.joinTailnet = false vnetOpts = append(vnetOpts, vnet.DontJoinTailnet) + case nodeOptNoAgent: + n.noAgent = true case nodeOptAdvertiseRoutes: n.advertiseRoutes = string(o) case nodeOptSNATSubnetRoutes: @@ -357,6 +365,7 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr { type nodeOptOS OSImage type nodeOptNoTailscale struct{} +type nodeOptNoAgent struct{} type nodeOptAdvertiseRoutes string type nodeOptSNATSubnetRoutes bool type nodeOptWebServer int @@ -367,6 +376,11 @@ func OS(img OSImage) nodeOptOS { return nodeOptOS(img) } // DontJoinTailnet returns a NodeOption that prevents the node from running tailscale up. func DontJoinTailnet() nodeOptNoTailscale { return nodeOptNoTailscale{} } +// NoAgent returns a NodeOption that skips TTA agent setup. The node will not +// have a test agent, so agent-dependent operations (Status, ExecOnNode, etc.) +// won't work. Useful for VMs that just need to boot and respond to ICMP. +func NoAgent() nodeOptNoAgent { return nodeOptNoAgent{} } + // AdvertiseRoutes returns a NodeOption that configures the node to advertise // the given routes (comma-separated CIDRs) when joining the tailnet. func AdvertiseRoutes(routes string) nodeOptAdvertiseRoutes { @@ -411,12 +425,27 @@ func (e *Env) Start() { t.Logf("using Tailscale release version %s (from --test-version=%q)", v, *testVersion) } - // Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy - // images). Gokrazy has binaries built-in, so doesn't need compilation. + // Check if any macOS nodes are present; if so, verify prerequisites. + hasMacOS := false + for _, n := range e.nodes { + if n.os.IsMacOS { + hasMacOS = true + break + } + } + if hasMacOS { + if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" { + t.Skip("macOS VM tests require macOS arm64 host") + } + } + + // Determine which GOOS/GOARCH pairs need compiled binaries (non-gokrazy, + // non-macOS images). Gokrazy has binaries built-in. macOS VMs don't use + // compiled binaries (no TTA agent). type platform struct{ goos, goarch string } needPlatform := set.Set[platform]{} for _, n := range e.nodes { - if !n.os.IsGokrazy { + if !n.os.IsGokrazy && !n.os.IsMacOS { needPlatform.Add(platform{n.os.GOOS(), n.os.GOARCH()}) } } @@ -438,7 +467,9 @@ func (e *Env) Start() { continue } didOS.Add(n.os.Name) - if n.os.IsGokrazy { + if n.os.IsMacOS { + imageSteps[n.os.Name] = e.AddStep("Prepare macOS Tart image") + } else if n.os.IsGokrazy { imageSteps["gokrazy"] = e.AddStep("Build gokrazy image") } else { imageSteps[n.os.Name] = e.AddStep(fmt.Sprintf("Prepare %s image", n.os.Name)) @@ -446,12 +477,18 @@ func (e *Env) Start() { } vnetStep := e.AddStep("Create virtual network") - qemuSteps := map[string]*Step{} + vmSteps := map[string]*Step{} agentSteps := map[string]*Step{} tsUpSteps := map[string]*Step{} for _, n := range e.nodes { - qemuSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name)) - agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name)) + if n.os.IsMacOS { + vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch macOS VM: %s", n.name)) + } else { + vmSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name)) + } + if !n.noAgent { + agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name)) + } if n.joinTailnet { tsUpSteps[n.name] = e.AddStep(fmt.Sprintf("Tailscale up: %s", n.name)) } @@ -485,7 +522,15 @@ func (e *Env) Start() { continue } didOS.Add(n.os.Name) - if n.os.IsGokrazy { + if n.os.IsMacOS { + step := imageSteps[n.os.Name] + eg.Go(func() error { + step.Begin() + ensureTartImage(t) + step.End(nil) + return nil + }) + } else if n.os.IsGokrazy { step := imageSteps["gokrazy"] eg.Go(func() error { step.Begin() @@ -591,7 +636,7 @@ func (e *Env) Start() { // not via the cloud-init HTTP VIP, because network-config must be available // during init-local before systemd-networkd-wait-online blocks. - // Start Unix socket listener. + // Start Unix stream socket listener (for QEMU VMs). e.sockAddr = filepath.Join(e.tempDir, "vnet.sock") srv, err := net.Listen("unix", e.sockAddr) if err != nil { @@ -609,18 +654,45 @@ func (e *Env) Start() { } }() - // Launch QEMU processes. + // Start Unix dgram socket listener (for macOS VMs via tailmac). + // Use /tmp/ instead of the test temp dir because Unix socket paths + // are limited to 104 bytes on macOS, and test temp dir paths are long. + if hasMacOS { + e.dgramSockAddr = fmt.Sprintf("/tmp/vmtest-dgram-%d.sock", os.Getpid()) + t.Cleanup(func() { os.Remove(e.dgramSockAddr) }) + dgramAddr, err := net.ResolveUnixAddr("unixgram", e.dgramSockAddr) + if err != nil { + t.Fatalf("resolve dgram addr: %v", err) + } + uc, err := net.ListenUnixgram("unixgram", dgramAddr) + if err != nil { + t.Fatalf("listen unixgram: %v", err) + } + t.Cleanup(func() { uc.Close() }) + go e.server.ServeUnixConn(uc, vnet.ProtocolUnixDGRAM) + } + + // Launch VM processes. for _, n := range e.nodes { - step := qemuSteps[n.name] + step := vmSteps[n.name] step.Begin() - if err := e.startQEMU(n); err != nil { - t.Fatalf("startQEMU(%s): %v", n.name, err) + if n.os.IsMacOS { + if err := e.startTailMacVM(n); err != nil { + t.Fatalf("startTailMacVM(%s): %v", n.name, err) + } + } else { + if err := e.startQEMU(n); err != nil { + t.Fatalf("startQEMU(%s): %v", n.name, err) + } } step.End(nil) } // Set up agent clients and wait for all agents to connect. for _, n := range e.nodes { + if n.noAgent { + continue + } n.agent = e.server.NodeAgentClient(n.vnetNode) n.vnetNode.SetClient(n.agent) } @@ -628,6 +700,9 @@ func (e *Env) Start() { // Wait for agents, then bring up tailscale. var agentEg errgroup.Group for _, n := range e.nodes { + if n.noAgent { + continue + } agentEg.Go(func() error { aStep := agentSteps[n.name] aStep.Begin() @@ -1123,6 +1198,51 @@ func (e *Env) HTTPGet(from *Node, targetURL string) string { return "" } +// Agent returns the node's TTA agent client, or nil if NoAgent is set. +func (n *Node) Agent() *vnet.NodeAgentClient { + return n.agent +} + +// LANPing pings a LAN IP from the given node using TTA's /ping endpoint. +// It retries for up to 2 minutes, which is enough for a macOS VM to boot +// and acquire a DHCP lease. +func (e *Env) LANPing(from *Node, targetIP netip.Addr) { + if from.agent == nil { + e.t.Fatalf("LANPing: node %s has no agent (NoAgent set?)", from.name) + } + e.t.Logf("LANPing: %s -> %s", from.name, targetIP) + deadline := time.Now().Add(2 * time.Minute) + for attempt := 0; time.Now().Before(deadline); attempt++ { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + reqURL := fmt.Sprintf("http://unused/ping?host=%s", targetIP) + req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) + if err != nil { + cancel() + e.t.Fatalf("LANPing: %v", err) + } + res, err := from.agent.HTTPClient.Do(req) + cancel() + if err != nil { + if attempt%10 == 0 { + e.t.Logf("LANPing attempt %d: %v", attempt+1, err) + } + time.Sleep(2 * time.Second) + continue + } + body, _ := io.ReadAll(res.Body) + res.Body.Close() + if res.StatusCode == 200 { + e.t.Logf("LANPing: %s -> %s succeeded on attempt %d", from.name, targetIP, attempt+1) + return + } + if attempt%10 == 0 { + e.t.Logf("LANPing attempt %d: status %d, body: %s", attempt+1, res.StatusCode, string(body)) + } + time.Sleep(2 * time.Second) + } + e.t.Fatalf("LANPing: %s -> %s timed out after 2 minutes", from.name, targetIP) +} + // SendTaildropFile sends a file via Taildrop from one node to another. // The to node must be on the tailnet. It fatals on error. func (e *Env) SendTaildropFile(from, to *Node, name string, content []byte) { diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go index 6ca46c717..e1c343977 100644 --- a/tstest/natlab/vmtest/vmtest_test.go +++ b/tstest/natlab/vmtest/vmtest_test.go @@ -16,6 +16,26 @@ import ( "tailscale.com/tstest/natlab/vnet" ) +func TestMacOSAndLinuxCanPing(t *testing.T) { + env := vmtest.New(t) + + lan := env.AddNetwork("192.168.1.1/24") + + linux := env.AddNode("linux", lan, + vmtest.OS(vmtest.Gokrazy), + vmtest.DontJoinTailnet()) + macos := env.AddNode("macos", lan, + vmtest.OS(vmtest.MacOS), + vmtest.DontJoinTailnet(), + vmtest.NoAgent()) + + env.Start() + + // Ping from Linux (which has TTA) to macOS (which just responds to ICMP). + // LANPing retries until the macOS VM has booted and acquired a DHCP lease. + env.LANPing(linux, macos.LanIP(lan)) +} + func TestSubnetRouter(t *testing.T) { testSubnetRouterForOS(t, vmtest.Ubuntu2404) } diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index efe2e1984..4836eea05 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -1077,8 +1077,7 @@ func (s *Server) ServeUnixConn(uc *net.UnixConn, proto Protocol) { n, addr, err := uc.ReadFromUnix(buf) raddr = addr if err != nil { - if s.shutdownCtx.Err() != nil { - // Return without logging. + if s.shutdownCtx.Err() != nil || errors.Is(err, net.ErrClosed) { return } s.logf("ReadFromUnix: %#v", err)