From b9eac14ef93b825e11177622a90440c64ca91052 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Sat, 11 Apr 2026 04:33:48 +0000 Subject: [PATCH] tstest/natlab/vmtest: add web UI for watching VM tests live Add an optional --vmtest-web flag that starts an HTTP server showing a live dashboard for vmtest runs. The dashboard includes: - Step progress tracker showing all test phases (compile, image prep, QEMU launch, agent connect, tailscale up, test-specific steps) with status icons and elapsed times - Per-VM "virtual monitor" cards showing serial console output streamed in realtime via WebSocket - Per-NIC DHCP status (supporting multi-homed VMs like subnet routers) - Per-node Tailscale status (hidden for non-tailnet VMs) - Test status badge (Running/Passed/Failed) with live elapsed timer - Event log showing all lifecycle events chronologically Architecture follows the existing util/eventbus HTMX+WebSocket pattern: the server pushes HTML fragments with hx-swap-oob attributes over a WebSocket, and HTMX routes them to the correct DOM elements by ID. Key components: - vmstatus.go: Step tracker (Begin/End lifecycle), EventBus (pub/sub with history for late joiners), VMEvent types, NodeStatus tracking - web.go: HTTP server, WebSocket handler, template loading, ANSI-to-HTML conversion via robert-nix/ansihtml, deterministic port selection - assets/: HTML templates, CSS, HTMX library (copied from eventbus) - vnet/vnet.go: DHCP event callback on Server for observing DHCP lifecycle - qemu.go: Console log file tailing with manual offset-based reading Usage: go test ./tstest/natlab/vmtest/ --run-vm-tests --vmtest-web=:0 -v When using :0, a deterministic port based on the test name is tried first so re-runs get the same URL, falling back to OS-assigned on conflict. Updates #13038 Change-Id: I45281347b3d7af78ed9f4ff896033984f84dcb4d Signed-off-by: Brad Fitzpatrick --- flake.nix | 2 +- go.mod | 1 + go.mod.sri | 2 +- go.sum | 2 + shell.nix | 2 +- tstest/natlab/vmtest/assets/event.html | 39 +++ tstest/natlab/vmtest/assets/index.html | 111 +++++++++ tstest/natlab/vmtest/assets/style.css | 168 +++++++++++++ tstest/natlab/vmtest/qemu.go | 73 ++++++ tstest/natlab/vmtest/vmstatus.go | 320 +++++++++++++++++++++++++ tstest/natlab/vmtest/vmtest.go | 316 +++++++++++++++++++++++- tstest/natlab/vmtest/vmtest_test.go | 117 +++++++-- tstest/natlab/vmtest/web.go | 189 +++++++++++++++ tstest/natlab/vnet/vnet.go | 19 ++ 14 files changed, 1322 insertions(+), 39 deletions(-) create mode 100644 tstest/natlab/vmtest/assets/event.html create mode 100644 tstest/natlab/vmtest/assets/index.html create mode 100644 tstest/natlab/vmtest/assets/style.css create mode 100644 tstest/natlab/vmtest/vmstatus.go create mode 100644 tstest/natlab/vmtest/web.go diff --git a/flake.nix b/flake.nix index 3febd4990..fa03ceb51 100644 --- a/flake.nix +++ b/flake.nix @@ -163,4 +163,4 @@ }); }; } -# nix-direnv cache busting line: sha256-5uzkG6NQh0znjgE6yV5b01y8bUlTvLqXyAoWbMRQNEY= +# nix-direnv cache busting line: sha256-ruRbOB2W9snyOYY0+6OD5IndI/JJKqrhTuPlBsKikRc= diff --git a/go.mod b/go.mod index 4d2dde8b2..14de608d7 100644 --- a/go.mod +++ b/go.mod @@ -86,6 +86,7 @@ require ( github.com/prometheus/client_golang v1.23.0 github.com/prometheus/common v0.65.0 github.com/prometheus/prometheus v0.49.2-0.20240125131847-c3b8ef1694ff + github.com/robert-nix/ansihtml v1.0.1 github.com/safchain/ethtool v0.3.0 github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e github.com/studio-b12/gowebdav v0.9.0 diff --git a/go.mod.sri b/go.mod.sri index c80164252..dd04e0fa6 100644 --- a/go.mod.sri +++ b/go.mod.sri @@ -1 +1 @@ -sha256-5uzkG6NQh0znjgE6yV5b01y8bUlTvLqXyAoWbMRQNEY= +sha256-ruRbOB2W9snyOYY0+6OD5IndI/JJKqrhTuPlBsKikRc= diff --git a/go.sum b/go.sum index f1ddedbbf..d7cf5f0b7 100644 --- a/go.sum +++ b/go.sum @@ -1019,6 +1019,8 @@ github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRl github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/robert-nix/ansihtml v1.0.1 h1:VTiyQ6/+AxSJoSSLsMecnkh8i0ZqOEdiRl/odOc64fc= +github.com/robert-nix/ansihtml v1.0.1/go.mod h1:CJwclxYaTPc2RfcxtanEACsYuTksh4yDXcNeHHKZINE= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= diff --git a/shell.nix b/shell.nix index d1cbe7154..6658972f3 100644 --- a/shell.nix +++ b/shell.nix @@ -16,4 +16,4 @@ ) { src = ./.; }).shellNix -# nix-direnv cache busting line: sha256-5uzkG6NQh0znjgE6yV5b01y8bUlTvLqXyAoWbMRQNEY= +# nix-direnv cache busting line: sha256-ruRbOB2W9snyOYY0+6OD5IndI/JJKqrhTuPlBsKikRc= diff --git a/tstest/natlab/vmtest/assets/event.html b/tstest/natlab/vmtest/assets/event.html new file mode 100644 index 000000000..70d8e69cf --- /dev/null +++ b/tstest/natlab/vmtest/assets/event.html @@ -0,0 +1,39 @@ +{{if eq .Type "test_status"}} +{{.Message}} ({{.Detail}}) +{{end}} + +{{if eq .Type "step_changed"}} +
+ {{.Step.Status.Icon}} + {{.Step.Name}} + {{formatDuration .Step.Elapsed}} +
+{{end}} + +{{if eq .Type "console_output"}} +
{{ansi .Message}} +
+{{end}} + +{{if eq .Type "dhcp_discover"}} +Discover sent +{{end}} + +{{if eq .Type "dhcp_offer"}} +Offered {{.Detail}} +{{end}} + +{{if eq .Type "dhcp_request"}} +Requesting {{.Detail}} +{{end}} + +{{if eq .Type "dhcp_ack"}} +Got {{.Detail}} +{{end}} + +{{if eq .Type "tailscale"}} +{{.Detail}} +{{end}} + +
{{.Time.Format "15:04:05.000"}} {{if .NodeName}}[{{.NodeName}}] {{end}}{{.Message}}{{if .Detail}} {{.Detail}}{{end}}
+
diff --git a/tstest/natlab/vmtest/assets/index.html b/tstest/natlab/vmtest/assets/index.html new file mode 100644 index 000000000..093cb6a38 --- /dev/null +++ b/tstest/natlab/vmtest/assets/index.html @@ -0,0 +1,111 @@ + + + + + VMTest: {{.TestName}} + + + + + + +

VMTest: {{.TestName}} {{.TestStatus.State}} ({{formatDuration .TestStatus.Elapsed}})

+ +
+

Progress

+ {{range .Steps}} +
+ {{.Status.Icon}} + {{.Name}} + {{if ne .Status.String "pending"}}{{formatDuration .Elapsed}}{{end}} +
+ {{end}} +
+ +
+ {{range $node := .Nodes}} +
+
+ {{$node.Name}} + {{$node.OS}} +
+
+ {{range $i, $nic := $node.NICs}} +
+ DHCP{{if gt (len $node.NICs) 1}} ({{$nic.NetName}}){{end}}: + {{$nic.DHCP}} +
+ {{end}} + {{if $node.JoinsTailnet}} +
+ Tailscale: + {{$node.Tailscale}} +
+ {{end}} +
+
{{range $node.Console}}{{ansi .}} +{{end}}
+
+ {{end}} +
+ +
+

Events

+
+
+ + + + + diff --git a/tstest/natlab/vmtest/assets/style.css b/tstest/natlab/vmtest/assets/style.css new file mode 100644 index 000000000..fff676dd5 --- /dev/null +++ b/tstest/natlab/vmtest/assets/style.css @@ -0,0 +1,168 @@ +/* CSS reset */ +*, *::before, *::after { box-sizing: border-box; } +* { margin: 0; } +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + line-height: 1.5; + background: #1a1a2e; + color: #e0e0e0; + padding: 16px; +} + +h1 { + font-size: 1.4em; + margin-bottom: 16px; + color: #fff; +} + +.test-status { + font-size: 0.7em; + padding: 2px 10px; + border-radius: 4px; + font-weight: bold; + vertical-align: middle; +} + +.test-Running { background: #2563eb; color: #fff; } +.test-Passed { background: #16a34a; color: #fff; } +.test-Failed { background: #dc2626; color: #fff; } + +h2 { + font-size: 1.1em; + margin-bottom: 8px; + color: #ccc; +} + +/* Step progress panel */ +.steps { + background: #16213e; + border: 1px solid #333; + border-radius: 6px; + padding: 12px; + margin-bottom: 16px; +} + +.step { + display: flex; + align-items: center; + gap: 8px; + padding: 4px 8px; + font-family: monospace; + font-size: 13px; + border-radius: 3px; +} + +.step-pending { color: #666; } +.step-running { color: #4af; font-weight: bold; background: rgba(68, 170, 255, 0.1); } +.step-done { color: #4a4; } +.step-failed { color: #f44; font-weight: bold; background: rgba(255, 68, 68, 0.1); } + +.step-icon { width: 1.2em; text-align: center; } +.step-name { flex: 1; } +.step-time { color: #666; font-size: 12px; min-width: 6em; text-align: right; } + +/* VM card grid */ +.vm-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); + gap: 12px; + margin-bottom: 16px; +} + +.vm-card { + background: #16213e; + border: 1px solid #333; + border-radius: 6px; + padding: 12px; +} + +.vm-header { + display: flex; + align-items: center; + gap: 8px; + margin-bottom: 8px; +} + +.vm-name { + font-weight: bold; + font-size: 1.1em; + color: #fff; +} + +.vm-os { + font-size: 0.8em; + background: #333; + padding: 1px 6px; + border-radius: 3px; + color: #aaa; +} + +.vm-status { + display: flex; + flex-direction: column; + gap: 2px; + margin-bottom: 8px; + font-family: monospace; + font-size: 13px; +} + +.vm-status-line { + display: flex; + gap: 8px; +} + +.vm-status-label { + color: #888; + min-width: 7em; +} + +.vm-status-value { + color: #4af; +} + +/* Console output */ +.console { + background: #0a0a0a; + color: #ccc; + font-family: "Cascadia Code", "Fira Code", "Consolas", monospace; + font-size: 11px; + line-height: 1.3; + max-height: 300px; + overflow-y: auto; + white-space: pre-wrap; + word-break: break-all; + padding: 8px; + border-radius: 4px; + border: 1px solid #222; +} + +/* Event log */ +.event-log { + background: #16213e; + border: 1px solid #333; + border-radius: 6px; + padding: 12px; +} + +.events { + max-height: 300px; + overflow-y: auto; +} + +.event { + font-family: monospace; + font-size: 12px; + padding: 1px 0; + border-bottom: 1px solid #1a1a2e; +} + +.event-time { color: #666; } +.event-node { color: #4af; font-weight: bold; } +.event-msg { color: #ccc; } +.event-detail { color: #888; } + +.event-dhcp_discover .event-msg, +.event-dhcp_request .event-msg { color: #fa4; } +.event-dhcp_offer .event-msg, +.event-dhcp_ack .event-msg { color: #4f4; } +.event-step_changed .event-msg { color: #aaf; } diff --git a/tstest/natlab/vmtest/qemu.go b/tstest/natlab/vmtest/qemu.go index df56322fa..a2ccd780c 100644 --- a/tstest/natlab/vmtest/qemu.go +++ b/tstest/natlab/vmtest/qemu.go @@ -5,6 +5,7 @@ package vmtest import ( "bytes" + "context" "encoding/json" "fmt" "net" @@ -13,6 +14,7 @@ import ( "path/filepath" "regexp" "strconv" + "strings" "time" "tailscale.com/tstest/natlab/vnet" @@ -163,6 +165,11 @@ func (e *Env) launchQEMU(name, logPath string, args []string) error { } e.t.Logf("launched QEMU for %s (pid %d), log: %s", name, cmd.Process.Pid, logPath) e.qemuProcs = append(e.qemuProcs, cmd) + + // Start tailing the VM console log for the web UI. + if e.ctx != nil { + go e.tailLogFile(e.ctx, name, logPath) + } e.t.Cleanup(func() { cmd.Process.Kill() cmd.Wait() @@ -237,3 +244,69 @@ func qmpQueryHostFwd(sockPath string) (int, error) { } return strconv.Atoi(m[1]) } + +// tailLogFile tails a VM's serial console log file and publishes each line +// as an EventConsoleOutput to the event bus for the web UI. +func (e *Env) tailLogFile(ctx context.Context, name, logPath string) { + // Wait for the file to appear (QEMU may not have created it yet). + var f *os.File + for { + var err error + f, err = os.Open(logPath) + if err == nil { + break + } + select { + case <-ctx.Done(): + return + case <-time.After(100 * time.Millisecond): + } + } + defer f.Close() + + // Read the file in a loop, tracking our position manually. + // We can't use bufio.Scanner because it caches EOF and won't + // pick up new data appended by QEMU after the first EOF. + var buf []byte + var partial string // incomplete line (no trailing newline yet) + readBuf := make([]byte, 4096) + for { + n, err := f.Read(readBuf) + if n > 0 { + buf = append(buf, readBuf[:n]...) + // Split into complete lines. + for { + idx := bytes.IndexByte(buf, '\n') + if idx < 0 { + break + } + line := partial + string(buf[:idx]) + partial = "" + buf = buf[idx+1:] + // Strip trailing \r from serial consoles. + line = strings.TrimRight(line, "\r") + if line == "" { + continue + } + e.appendConsoleLine(name, line) + e.eventBus.Publish(VMEvent{ + NodeName: name, + Type: EventConsoleOutput, + Message: line, + }) + } + if len(buf) > 0 { + partial = string(buf) + buf = buf[:0] + } + } + if err != nil || n == 0 { + // EOF or error — wait for more data. + select { + case <-ctx.Done(): + return + case <-time.After(100 * time.Millisecond): + } + } + } +} diff --git a/tstest/natlab/vmtest/vmstatus.go b/tstest/natlab/vmtest/vmstatus.go new file mode 100644 index 000000000..4b855e721 --- /dev/null +++ b/tstest/natlab/vmtest/vmstatus.go @@ -0,0 +1,320 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest + +import ( + "fmt" + "sync" + "time" +) + +// StepStatus is the state of a declared test step. +type StepStatus int + +const ( + StepPending StepStatus = iota // not yet started + StepRunning // Begin called + StepDone // End(nil) called + StepFailed // End(non-nil) called +) + +func (s StepStatus) String() string { + switch s { + case StepPending: + return "pending" + case StepRunning: + return "running" + case StepDone: + return "done" + case StepFailed: + return "failed" + } + return fmt.Sprintf("StepStatus(%d)", int(s)) +} + +// Icon returns a Unicode icon for the step status. +func (s StepStatus) Icon() string { + switch s { + case StepPending: + return "○" + case StepRunning: + return "◉" + case StepDone: + return "✓" + case StepFailed: + return "✗" + } + return "?" +} + +// Step is a declared stage of a test, created by [Env.AddStep]. +// The web UI shows all steps from the start, tracking their progress. +type Step struct { + mu sync.Mutex + name string + index int // 0-based position in Env.steps + env *Env + status StepStatus + err error + started time.Time + ended time.Time +} + +// Name returns the step's display name. +func (s *Step) Name() string { return s.name } + +// Index returns the step's 0-based position. +func (s *Step) Index() int { return s.index } + +// Status returns the current status. +func (s *Step) Status() StepStatus { + s.mu.Lock() + defer s.mu.Unlock() + return s.status +} + +// Err returns the error if the step failed, or nil. +func (s *Step) Err() error { + s.mu.Lock() + defer s.mu.Unlock() + return s.err +} + +// Elapsed returns how long the step has been running (if running) +// or how long it took (if done/failed). Returns 0 if pending. +func (s *Step) Elapsed() time.Duration { + s.mu.Lock() + defer s.mu.Unlock() + if s.started.IsZero() { + return 0 + } + if !s.ended.IsZero() { + return s.ended.Sub(s.started) + } + return time.Since(s.started) +} + +// Begin marks the step as running. Publishes an event to the web UI. +func (s *Step) Begin() { + s.mu.Lock() + if s.status != StepPending { + s.mu.Unlock() + panic(fmt.Sprintf("Step %q: Begin called in state %s", s.name, s.status)) + } + s.started = time.Now() + s.status = StepRunning + s.mu.Unlock() + s.env.publishStepChange(s) +} + +// End marks the step as done (err == nil) or failed (err != nil). +// It publishes a status change event to the web UI. +// It does not call t.Fatalf; callers should handle the error as appropriate +// (return it from errgroup, call t.Fatalf on the test goroutine, etc). +func (s *Step) End(err error) { + s.mu.Lock() + if s.status != StepRunning { + s.mu.Unlock() + panic(fmt.Sprintf("Step %q: End called in state %s", s.name, s.status)) + } + s.ended = time.Now() + if err != nil { + s.status = StepFailed + s.err = err + } else { + s.status = StepDone + } + s.mu.Unlock() + s.env.publishStepChange(s) +} + +// EventType identifies the kind of event published to the EventBus. +type EventType string + +const ( + EventStepChanged EventType = "step_changed" // a Step changed status + EventConsoleOutput EventType = "console_output" // serial console line + EventDHCPDiscover EventType = "dhcp_discover" // VM sent DHCP Discover + EventDHCPOffer EventType = "dhcp_offer" // server sent DHCP Offer + EventDHCPRequest EventType = "dhcp_request" // VM sent DHCP Request + EventDHCPAck EventType = "dhcp_ack" // server sent DHCP Ack + EventTailscale EventType = "tailscale" // Tailscale status change + EventTestStatus EventType = "test_status" // test Running/Passed/Failed +) + +// TestStatus tracks whether the overall test is running, passed, or failed. +type TestStatus struct { + mu sync.Mutex + state string // "Running", "Passed", "Failed" + started time.Time + ended time.Time +} + +func newTestStatus() *TestStatus { + return &TestStatus{state: "Running", started: time.Now()} +} + +// State returns the current test state. +func (ts *TestStatus) State() string { + ts.mu.Lock() + defer ts.mu.Unlock() + return ts.state +} + +// Elapsed returns total test duration. +func (ts *TestStatus) Elapsed() time.Duration { + ts.mu.Lock() + defer ts.mu.Unlock() + if !ts.ended.IsZero() { + return ts.ended.Sub(ts.started) + } + return time.Since(ts.started) +} + +// StartUnixMilli returns the test start time as Unix milliseconds, +// for the client-side elapsed timer. +func (ts *TestStatus) StartUnixMilli() int64 { + ts.mu.Lock() + defer ts.mu.Unlock() + return ts.started.UnixMilli() +} + +// finish marks the test as passed or failed. +func (ts *TestStatus) finish(failed bool) { + ts.mu.Lock() + defer ts.mu.Unlock() + ts.ended = time.Now() + if failed { + ts.state = "Failed" + } else { + ts.state = "Passed" + } +} + +// VMEvent is a single event published to the [EventBus]. +type VMEvent struct { + Time time.Time + NodeName string // "" for global events + Type EventType + Message string // human-readable description + Detail string // e.g. IP address, node key + Step *Step // non-nil for EventStepChanged + NIC int // NIC index for DHCP events (0-based); -1 if not applicable +} + +// NICStatus is the DHCP state for one NIC on a node. +type NICStatus struct { + NetName string // human label like "192.168.1.0/24" or "10.0.0.0/24" + DHCP string // "waiting", "Discover sent", "Got 10.0.0.101", etc. +} + +// NodeStatus tracks the current DHCP and Tailscale state of a VM node +// for rendering on the web UI's initial page load. +type NodeStatus struct { + Name string + OS string + NICs []NICStatus // one per NIC; index matches NIC index + JoinsTailnet bool // whether this node runs Tailscale + Tailscale string // "--", "Up (100.64.0.1)", etc. + Console []string // recent console output lines (ring buffer) +} + +const maxConsoleLines = 200 + +const ( + eventBusHistorySize = 500 + subscriberChannelSize = 1000 +) + +// EventBus broadcasts VMEvents to subscribers and keeps a history for +// late joiners. It is safe for concurrent use. +type EventBus struct { + mu sync.Mutex + history []VMEvent + subscribers map[*subscriber]struct{} +} + +func newEventBus() *EventBus { + return &EventBus{ + subscribers: make(map[*subscriber]struct{}), + } +} + +// Publish sends an event to all subscribers and appends it to the history. +// Non-blocking: slow subscribers are skipped. +func (b *EventBus) Publish(ev VMEvent) { + if ev.Time.IsZero() { + ev.Time = time.Now() + } + b.mu.Lock() + defer b.mu.Unlock() + b.history = append(b.history, ev) + if len(b.history) > eventBusHistorySize { + // Trim old events. + copy(b.history, b.history[len(b.history)-eventBusHistorySize:]) + b.history = b.history[:eventBusHistorySize] + } + for sub := range b.subscribers { + select { + case sub.ch <- ev: + default: + // Slow consumer, skip. + } + } +} + +// Subscribe returns a new subscriber that receives the event history +// followed by live events. +func (b *EventBus) Subscribe() *subscriber { + b.mu.Lock() + defer b.mu.Unlock() + sub := &subscriber{ + bus: b, + ch: make(chan VMEvent, subscriberChannelSize), + done: make(chan struct{}), + } + // Send history. + for _, ev := range b.history { + select { + case sub.ch <- ev: + default: + } + } + b.subscribers[sub] = struct{}{} + return sub +} + +func (b *EventBus) unsubscribe(sub *subscriber) { + b.mu.Lock() + defer b.mu.Unlock() + delete(b.subscribers, sub) +} + +// subscriber receives events from an [EventBus]. +type subscriber struct { + bus *EventBus + ch chan VMEvent + done chan struct{} + once sync.Once +} + +// Events returns the channel of events. Closed when Close is called. +func (s *subscriber) Events() <-chan VMEvent { + return s.ch +} + +// Close unsubscribes and closes the event channel. +func (s *subscriber) Close() { + s.once.Do(func() { + if s.bus != nil { + s.bus.unsubscribe(s) + } + close(s.done) + }) +} + +// Done returns a channel that's closed when Close is called. +func (s *subscriber) Done() <-chan struct{} { + return s.done +} diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go index fbc6652e8..876778760 100644 --- a/tstest/natlab/vmtest/vmtest.go +++ b/tstest/natlab/vmtest/vmtest.go @@ -31,6 +31,7 @@ import ( "testing" "time" + "github.com/google/gopacket/layers" "golang.org/x/sync/errgroup" "tailscale.com/client/local" "tailscale.com/ipn" @@ -67,6 +68,15 @@ type Env struct { gokrazyKernel string // path to gokrazy kernel qemuProcs []*exec.Cmd // launched QEMU processes + + // Web UI support. + ctx context.Context // cancelled when test ends + eventBus *EventBus + testStatus *TestStatus + steps []*Step + + nodeStatusMu sync.Mutex + nodeStatus map[string]*NodeStatus // keyed by node name } // logVerbosef logs a message only when --verbose-vm-debug is set. @@ -77,6 +87,145 @@ func (e *Env) logVerbosef(format string, args ...any) { } } +// AddStep declares an expected stage of the test. The web UI shows all steps +// from the start, tracking their progress. Call before or during the test. +// Returns a *Step whose Begin/End methods drive the progress display. +func (e *Env) AddStep(name string) *Step { + s := &Step{ + name: name, + index: len(e.steps), + env: e, + } + e.steps = append(e.steps, s) + return s +} + +// Steps returns all declared steps in order. +func (e *Env) Steps() []*Step { + return e.steps +} + +// publishStepChange publishes a step status change event. +func (e *Env) publishStepChange(s *Step) { + e.eventBus.Publish(VMEvent{ + Type: EventStepChanged, + Message: fmt.Sprintf("%s %s", s.Status().Icon(), s.name), + Step: s, + }) +} + +// initNodeStatus initializes the NodeStatus for all nodes. Called after +// AddNode but before Start so the web UI can render them. +func (e *Env) initNodeStatus() { + e.nodeStatusMu.Lock() + defer e.nodeStatusMu.Unlock() + for _, n := range e.nodes { + nics := make([]NICStatus, len(n.nets)) + for i := range n.nets { + nics[i] = NICStatus{ + NetName: e.nicLabel(n, i), + DHCP: "waiting", + } + } + e.nodeStatus[n.name] = &NodeStatus{ + Name: n.name, + OS: n.os.Name, + NICs: nics, + JoinsTailnet: n.joinTailnet, + Tailscale: "--", + } + } +} + +// nicLabel returns a short human-readable label for a node's i-th NIC. +// After Start(), we can use the assigned LAN IP. Before that, we use "NIC N". +func (e *Env) nicLabel(n *Node, i int) string { + if n.vnetNode != nil { + ip := n.vnetNode.LanIP(n.nets[i]) + if ip.IsValid() { + return ip.String() + } + } + return fmt.Sprintf("NIC %d", i) +} + +// getNodeStatus returns the current status for a node. +func (e *Env) getNodeStatus(name string) NodeStatus { + e.nodeStatusMu.Lock() + defer e.nodeStatusMu.Unlock() + ns := e.nodeStatus[name] + if ns == nil { + return NodeStatus{Name: name, Tailscale: "--"} + } + return *ns +} + +// setNodeDHCP updates the DHCP status for a specific NIC on a node. +func (e *Env) setNodeDHCP(name string, nicIdx int, status string) { + e.nodeStatusMu.Lock() + ns := e.nodeStatus[name] + if ns != nil && nicIdx < len(ns.NICs) { + ns.NICs[nicIdx].DHCP = status + } + e.nodeStatusMu.Unlock() +} + +// setNodeTailscale updates the Tailscale status for a node and publishes +// an event so the web UI updates via WebSocket. +func (e *Env) setNodeTailscale(name, status string) { + e.nodeStatusMu.Lock() + ns := e.nodeStatus[name] + if ns != nil { + ns.Tailscale = status + } + e.nodeStatusMu.Unlock() + e.eventBus.Publish(VMEvent{ + NodeName: name, + Type: EventTailscale, + Message: "Tailscale: " + status, + Detail: status, + }) +} + +// appendConsoleLine adds a line to a node's console buffer. +func (e *Env) appendConsoleLine(name, line string) { + e.nodeStatusMu.Lock() + ns := e.nodeStatus[name] + if ns != nil { + ns.Console = append(ns.Console, line) + if len(ns.Console) > maxConsoleLines { + ns.Console = ns.Console[len(ns.Console)-maxConsoleLines:] + } + } + e.nodeStatusMu.Unlock() +} + +// nicIndexForMAC returns the NIC index (0-based) for a given MAC on a node. +// Returns -1 if not found. +func (e *Env) nicIndexForMAC(name string, mac vnet.MAC) int { + for _, n := range e.nodes { + if n.name != name { + continue + } + for i := range n.nets { + if n.vnetNode.NICMac(i) == mac { + return i + } + } + } + return -1 +} + +// nodeNameByNum returns the node name for a given vnet node number. +func (e *Env) nodeNameByNum(num int) string { + for _, n := range e.nodes { + if n.num == num { + return n.name + } + } + return fmt.Sprintf("node%d", num) +} + // New creates a new test environment. It skips the test if --run-vm-tests is not set. func New(t testing.TB) *Env { if !*runVMTests { @@ -84,11 +233,23 @@ func New(t testing.TB) *Env { } tempDir := t.TempDir() - return &Env{ - t: t, - tempDir: tempDir, - binDir: filepath.Join(tempDir, "bin"), + e := &Env{ + t: t, + tempDir: tempDir, + binDir: filepath.Join(tempDir, "bin"), + eventBus: newEventBus(), + testStatus: newTestStatus(), + nodeStatus: make(map[string]*NodeStatus), } + t.Cleanup(func() { + e.testStatus.finish(t.Failed()) + e.eventBus.Publish(VMEvent{ + Type: EventTestStatus, + Message: e.testStatus.State(), + Detail: formatDuration(e.testStatus.Elapsed()), + }) + }) + return e } // AddNetwork creates a new virtual network. Arguments follow the same pattern as @@ -197,6 +358,11 @@ func (e *Env) Start() { t := e.t ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) t.Cleanup(cancel) + e.ctx = ctx + + // Initialize node status and start web UI as early as possible. + e.initNodeStatus() + e.maybeStartWebServer() if err := os.MkdirAll(e.binDir, 0755); err != nil { t.Fatal(err) @@ -223,27 +389,94 @@ func (e *Env) Start() { } } - // Compile binaries and download/build images in parallel. - // Any failure cancels the others via the errgroup context. - eg, egCtx := errgroup.WithContext(ctx) + // Declare framework steps for the web UI. + // User-declared steps (from AddStep before Start) get moved to the end + // so framework steps (compile, image, QEMU, etc.) come first. + userSteps := e.steps + e.steps = nil + + compileSteps := map[platform]*Step{} for _, p := range needPlatform.Slice() { - eg.Go(func() error { - return e.compileBinariesForOS(egCtx, p.goos, p.goarch) - }) + compileSteps[p] = e.AddStep(fmt.Sprintf("Compile %s_%s binaries", p.goos, p.goarch)) } - didOS := set.Set[string]{} // dedup by image name + imageSteps := map[string]*Step{} // keyed by OS name + didOS := set.Set[string]{} // dedup by image name for _, n := range e.nodes { if didOS.Contains(n.os.Name) { continue } didOS.Add(n.os.Name) if n.os.IsGokrazy { + imageSteps["gokrazy"] = e.AddStep("Build gokrazy image") + } else { + imageSteps[n.os.Name] = e.AddStep(fmt.Sprintf("Prepare %s image", n.os.Name)) + } + } + vnetStep := e.AddStep("Create virtual network") + + qemuSteps := map[string]*Step{} + agentSteps := map[string]*Step{} + tsUpSteps := map[string]*Step{} + for _, n := range e.nodes { + qemuSteps[n.name] = e.AddStep(fmt.Sprintf("Launch QEMU: %s", n.name)) + agentSteps[n.name] = e.AddStep(fmt.Sprintf("Wait for agent: %s", n.name)) + if n.joinTailnet { + tsUpSteps[n.name] = e.AddStep(fmt.Sprintf("Tailscale up: %s", n.name)) + } + } + + // Re-append user-declared steps after all framework steps. + for _, s := range userSteps { + s.index = len(e.steps) + e.steps = append(e.steps, s) + } + + // Compile binaries and download/build images in parallel. + // Any failure cancels the others via the errgroup context. + eg, egCtx := errgroup.WithContext(ctx) + for _, p := range needPlatform.Slice() { + step := compileSteps[p] + eg.Go(func() error { + step.Begin() + err := e.compileBinariesForOS(egCtx, p.goos, p.goarch) + if err != nil { + step.End(err) + return err + } + step.End(nil) + return nil + }) + } + didOS = set.Set[string]{} // reset for second pass + for _, n := range e.nodes { + if didOS.Contains(n.os.Name) { + continue + } + didOS.Add(n.os.Name) + if n.os.IsGokrazy { + step := imageSteps["gokrazy"] eg.Go(func() error { - return e.ensureGokrazy(egCtx) + step.Begin() + err := e.ensureGokrazy(egCtx) + if err != nil { + step.End(err) + return err + } + step.End(nil) + return nil }) } else { + step := imageSteps[n.os.Name] + osImg := n.os eg.Go(func() error { - return ensureImage(egCtx, n.os) + step.Begin() + err := ensureImage(egCtx, osImg) + if err != nil { + step.End(err) + return err + } + step.End(nil) + return nil }) } } @@ -252,6 +485,7 @@ func (e *Env) Start() { } // Create the vnet server. + vnetStep.Begin() var err error e.server, err = vnet.New(&e.cfg) if err != nil { @@ -259,6 +493,50 @@ func (e *Env) Start() { } t.Cleanup(func() { e.server.Close() }) + // Register DHCP event callback for the web UI. + e.server.SetDHCPCallback(func(mac vnet.MAC, nodeNum int, msgType layers.DHCPMsgType, ip netip.Addr) { + name := e.nodeNameByNum(nodeNum) + nicIdx := e.nicIndexForMAC(name, mac) + ipStr := ip.String() + switch msgType { + case layers.DHCPMsgTypeDiscover: + e.setNodeDHCP(name, nicIdx, "Discover sent") + e.eventBus.Publish(VMEvent{ + NodeName: name, + Type: EventDHCPDiscover, + Message: "DHCP Discover sent", + NIC: nicIdx, + }) + case layers.DHCPMsgTypeOffer: + e.setNodeDHCP(name, nicIdx, "Offered "+ipStr) + e.eventBus.Publish(VMEvent{ + NodeName: name, + Type: EventDHCPOffer, + Message: "DHCP Offer received", + Detail: ipStr, + NIC: nicIdx, + }) + case layers.DHCPMsgTypeRequest: + e.setNodeDHCP(name, nicIdx, "Requesting "+ipStr) + e.eventBus.Publish(VMEvent{ + NodeName: name, + Type: EventDHCPRequest, + Message: "DHCP Request sent", + Detail: ipStr, + NIC: nicIdx, + }) + case layers.DHCPMsgTypeAck: + e.setNodeDHCP(name, nicIdx, "Got "+ipStr) + e.eventBus.Publish(VMEvent{ + NodeName: name, + Type: EventDHCPAck, + Message: "DHCP Ack: got " + ipStr, + Detail: ipStr, + NIC: nicIdx, + }) + } + }) + // Register compiled binaries with the file server VIP. // Binaries are registered at _/ (e.g. "linux_amd64/tta"). for _, p := range needPlatform.Slice() { @@ -271,6 +549,7 @@ func (e *Env) Start() { e.server.RegisterFile(dir+"/"+name, data) } } + vnetStep.End(nil) // Cloud-init config is delivered via local seed ISOs (created in startCloudQEMU), // not via the cloud-init HTTP VIP, because network-config must be available @@ -296,9 +575,12 @@ func (e *Env) Start() { // Launch QEMU processes. for _, n := range e.nodes { + step := qemuSteps[n.name] + step.Begin() if err := e.startQEMU(n); err != nil { t.Fatalf("startQEMU(%s): %v", n.name, err) } + step.End(nil) } // Set up agent clients and wait for all agents to connect. @@ -311,12 +593,15 @@ func (e *Env) Start() { var agentEg errgroup.Group for _, n := range e.nodes { agentEg.Go(func() error { + aStep := agentSteps[n.name] + aStep.Begin() t.Logf("[%s] waiting for agent...", n.name) st, err := n.agent.Status(ctx) if err != nil { return fmt.Errorf("[%s] agent status: %w", n.name, err) } t.Logf("[%s] agent connected, backend state: %s", n.name, st.BackendState) + aStep.End(nil) if n.vnetNode.HostFirewall() { if err := n.agent.EnableHostFirewall(ctx); err != nil { @@ -325,6 +610,8 @@ func (e *Env) Start() { } if n.joinTailnet { + tsStep := tsUpSteps[n.name] + tsStep.Begin() if err := e.tailscaleUp(ctx, n); err != nil { return fmt.Errorf("[%s] tailscale up: %w", n.name, err) } @@ -335,7 +622,10 @@ func (e *Env) Start() { if st.BackendState != "Running" { return fmt.Errorf("[%s] state = %q, want Running", n.name, st.BackendState) } + ips := fmt.Sprintf("%v", st.Self.TailscaleIPs) + e.setNodeTailscale(n.name, "Running "+ips) t.Logf("[%s] up with %v", n.name, st.Self.TailscaleIPs) + tsStep.End(nil) } return nil diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go index fd83f3fbc..68d3db1d6 100644 --- a/tstest/natlab/vmtest/vmtest_test.go +++ b/tstest/natlab/vmtest/vmtest_test.go @@ -37,13 +37,23 @@ func testSubnetRouterForOS(t testing.TB, srOS vmtest.OSImage) { vmtest.DontJoinTailnet(), vmtest.WebServer(8080)) - env.Start() - env.ApproveRoutes(sr, "10.0.0.0/24") + // Declare test-specific steps for the web UI. + approveStep := env.AddStep("Approve subnet routes") + httpStep := env.AddStep("HTTP GET through subnet router") + env.Start() + + approveStep.Begin() + env.ApproveRoutes(sr, "10.0.0.0/24") + approveStep.End(nil) + + httpStep.Begin() body := env.HTTPGet(client, fmt.Sprintf("http://%s:8080/", backend.LanIP(internalNet))) if !strings.Contains(body, "Hello world I am backend") { + httpStep.End(fmt.Errorf("got %q", body)) t.Fatalf("got %q", body) } + httpStep.End(nil) } func TestSiteToSite(t *testing.T) { @@ -95,9 +105,17 @@ func testSiteToSite(t *testing.T, srOS vmtest.OSImage) { vmtest.DontJoinTailnet(), vmtest.WebServer(8080)) + // Declare test-specific steps for the web UI. + approveStep := env.AddStep("Approve subnet routes (sr-a, sr-b)") + staticRouteStep := env.AddStep("Add static routes on backends") + httpStep := env.AddStep("HTTP GET through site-to-site") + env.Start() + + approveStep.Begin() env.ApproveRoutes(srA, "10.1.0.0/24") env.ApproveRoutes(srB, "10.2.0.0/24") + approveStep.End(nil) // Add static routes on the backends so that traffic to the remote site's // subnet goes through the local subnet router. This mirrors how a real @@ -107,16 +125,20 @@ func testSiteToSite(t *testing.T, srOS vmtest.OSImage) { t.Logf("sr-a LAN IP: %s, sr-b LAN IP: %s", srALanIP, srBLanIP) t.Logf("backend-a LAN IP: %s, backend-b LAN IP: %s", backendA.LanIP(lanA), backendB.LanIP(lanB)) + staticRouteStep.Begin() env.AddRoute(backendA, "10.2.0.0/24", srALanIP) env.AddRoute(backendB, "10.1.0.0/24", srBLanIP) + staticRouteStep.End(nil) // Make an HTTP request from backend-a to backend-b through the subnet routers. // TTA's /http-get falls back to direct dial on non-Tailscale nodes. + httpStep.Begin() backendBIP := backendB.LanIP(lanB) body := env.HTTPGet(backendA, fmt.Sprintf("http://%s:8080/", backendBIP)) t.Logf("response: %s", body) if !strings.Contains(body, "Hello world I am backend-b") { + httpStep.End(fmt.Errorf("expected response from backend-b, got %q", body)) t.Fatalf("expected response from backend-b, got %q", body) } @@ -124,8 +146,10 @@ func testSiteToSite(t *testing.T, srOS vmtest.OSImage) { // backend-b should see backend-a's LAN IP as the source, not sr-b's LAN IP. backendAIP := backendA.LanIP(lanA).String() if !strings.Contains(body, "from "+backendAIP) { + httpStep.End(fmt.Errorf("source IP not preserved: expected %q in response, got %q", backendAIP, body)) t.Fatalf("source IP not preserved: expected %q in response, got %q", backendAIP, body) } + httpStep.End(nil) } // TestInterNetworkTCP verifies that vnet routes raw TCP between simulated @@ -151,16 +175,23 @@ func TestInterNetworkTCP(t *testing.T) { vmtest.DontJoinTailnet(), vmtest.WebServer(8080)) + // Declare test-specific steps for the web UI. + httpStep := env.AddStep("HTTP GET across networks via NAT") + env.Start() + httpStep.Begin() body := env.HTTPGet(client, fmt.Sprintf("http://%s:8080/", webWAN)) t.Logf("response: %s", body) if !strings.Contains(body, "Hello world I am webserver") { + httpStep.End(fmt.Errorf("unexpected response: %q", body)) t.Fatalf("unexpected response: %q", body) } if !strings.Contains(body, "from "+clientWAN) { + httpStep.End(fmt.Errorf("expected source %q in response, got %q", clientWAN, body)) t.Fatalf("expected source %q in response, got %q", clientWAN, body) } + httpStep.End(nil) } // TestSubnetRouterPublicIP verifies that toggling --accept-routes on the @@ -198,33 +229,45 @@ func TestSubnetRouterPublicIP(t *testing.T) { vmtest.DontJoinTailnet(), vmtest.WebServer(8080)) + // Declare test-specific steps for the web UI. + approveStep := env.AddStep("Approve subnet route (public IP)") + checkOn1Step := env.AddStep("HTTP GET (accept-routes=on)") + checkOffStep := env.AddStep("HTTP GET (accept-routes=off)") + checkOn2Step := env.AddStep("HTTP GET (accept-routes=on, again)") + env.Start() // ApproveRoutes also turns on RouteAll on the client. + approveStep.Begin() env.ApproveRoutes(sr, webRoute) + approveStep.End(nil) webURL := fmt.Sprintf("http://%s:8080/", webWAN) - check := func(label, wantSrc string) { + check := func(step *vmtest.Step, label, wantSrc string) { t.Helper() + step.Begin() body := env.HTTPGet(client, webURL) t.Logf("[%s] response: %s", label, body) if !strings.Contains(body, "Hello world I am webserver") { + step.End(fmt.Errorf("[%s] unexpected webserver response: %q", label, body)) t.Fatalf("[%s] unexpected webserver response: %q", label, body) } if !strings.Contains(body, "from "+wantSrc) { + step.End(fmt.Errorf("[%s] expected source %q in response, got %q", label, wantSrc, body)) t.Fatalf("[%s] expected source %q in response, got %q", label, wantSrc, body) } + step.End(nil) } // accept-routes=on (set by ApproveRoutes): traffic flows via the subnet router. - check("accept-routes=on", routerWAN) + check(checkOn1Step, "accept-routes=on", routerWAN) // accept-routes=off: client dials the webserver directly. env.SetAcceptRoutes(client, false) - check("accept-routes=off", clientWAN) + check(checkOffStep, "accept-routes=off", clientWAN) // Toggle back on to confirm the transition works in both directions. env.SetAcceptRoutes(client, true) - check("accept-routes=on (again)", routerWAN) + check(checkOn2Step, "accept-routes=on (again)", routerWAN) } // TestSubnetRouterAndExitNode checks how the subnet router and exit node @@ -266,13 +309,8 @@ func TestSubnetRouterAndExitNode(t *testing.T) { vmtest.DontJoinTailnet(), vmtest.WebServer(8080)) - env.Start() - env.ApproveRoutes(sr, webRoute) - env.ApproveRoutes(exit, "0.0.0.0/0", "::/0") - // Don't let the exit node itself forward via the subnet router: when the - // client is using the exit node only, we want the exit node to egress to - // the simulated internet directly so the webserver sees the exit's WAN. - env.SetAcceptRoutes(exit, false) + // Declare test-specific steps for the web UI. + approveStep := env.AddStep("Approve subnet & exit routes") webURL := fmt.Sprintf("http://%s:8080/", webWAN) tests := []struct { @@ -280,25 +318,44 @@ func TestSubnetRouterAndExitNode(t *testing.T) { exit *vmtest.Node subnet bool wantSrc string + step *vmtest.Step }{ - {"exit-off,subnet-off", nil, false, clientWAN}, - {"exit-off,subnet-on", nil, true, routerWAN}, - {"exit-on,subnet-off", exit, false, exitWAN}, + {"exit-off,subnet-off", nil, false, clientWAN, nil}, + {"exit-off,subnet-on", nil, true, routerWAN, nil}, + {"exit-on,subnet-off", exit, false, exitWAN, nil}, // More-specific 5.0.0.0/24 from sr beats 0.0.0.0/0 from exit. - {"exit-on,subnet-on", exit, true, routerWAN}, + {"exit-on,subnet-on", exit, true, routerWAN, nil}, } + for i := range tests { + tests[i].step = env.AddStep("HTTP GET: " + tests[i].name) + } + + env.Start() + approveStep.Begin() + env.ApproveRoutes(sr, webRoute) + env.ApproveRoutes(exit, "0.0.0.0/0", "::/0") + // Don't let the exit node itself forward via the subnet router: when the + // client is using the exit node only, we want the exit node to egress to + // the simulated internet directly so the webserver sees the exit's WAN. + env.SetAcceptRoutes(exit, false) + approveStep.End(nil) + for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { + tc.step.Begin() env.SetExitNode(client, tc.exit) env.SetAcceptRoutes(client, tc.subnet) body := env.HTTPGet(client, webURL) t.Logf("response: %s", body) if !strings.Contains(body, "Hello world I am webserver") { + tc.step.End(fmt.Errorf("unexpected webserver response: %q", body)) t.Fatalf("unexpected webserver response: %q", body) } if !strings.Contains(body, "from "+tc.wantSrc) { + tc.step.End(fmt.Errorf("expected source %q in response, got %q", tc.wantSrc, body)) t.Fatalf("expected source %q in response, got %q", tc.wantSrc, body) } + tc.step.End(nil) }) } } @@ -342,31 +399,45 @@ func TestExitNode(t *testing.T) { vmtest.DontJoinTailnet(), vmtest.WebServer(8080)) - env.Start() - env.ApproveRoutes(exit1, "0.0.0.0/0", "::/0") - env.ApproveRoutes(exit2, "0.0.0.0/0", "::/0") + // Declare test-specific steps for the web UI. + approveStep := env.AddStep("Approve exit-node routes (exit1, exit2)") webURL := fmt.Sprintf("http://%s:8080/", webWAN) tests := []struct { name string // subtest name exit *vmtest.Node wantSrc string + step *vmtest.Step }{ - {"off", nil, clientWAN}, - {"exit1", exit1, exit1WAN}, - {"exit2", exit2, exit2WAN}, + {"off", nil, clientWAN, nil}, + {"exit1", exit1, exit1WAN, nil}, + {"exit2", exit2, exit2WAN, nil}, } + for i := range tests { + tests[i].step = env.AddStep("HTTP GET: exit=" + tests[i].name) + } + + env.Start() + approveStep.Begin() + env.ApproveRoutes(exit1, "0.0.0.0/0", "::/0") + env.ApproveRoutes(exit2, "0.0.0.0/0", "::/0") + approveStep.End(nil) + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + tt.step.Begin() env.SetExitNode(client, tt.exit) body := env.HTTPGet(client, webURL) t.Logf("response: %s", body) if !strings.Contains(body, "Hello world I am webserver") { + tt.step.End(fmt.Errorf("unexpected webserver response: %q", body)) t.Fatalf("unexpected webserver response: %q", body) } if !strings.Contains(body, "from "+tt.wantSrc) { + tt.step.End(fmt.Errorf("expected source %q in response, got %q", tt.wantSrc, body)) t.Fatalf("expected source %q in response, got %q", tt.wantSrc, body) } + tt.step.End(nil) }) } } diff --git a/tstest/natlab/vmtest/web.go b/tstest/natlab/vmtest/web.go new file mode 100644 index 000000000..ddd929752 --- /dev/null +++ b/tstest/natlab/vmtest/web.go @@ -0,0 +1,189 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest + +import ( + "embed" + "flag" + "fmt" + "hash/crc32" + "html/template" + "io" + "io/fs" + "net" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/coder/websocket" + "github.com/robert-nix/ansihtml" +) + +var vmtestWeb = flag.String("vmtest-web", "", "listen address for vmtest web UI (e.g. :0, localhost:0, :8080)") + +//go:embed assets/*.html +var templatesSrc embed.FS + +//go:embed assets/*.css +var staticAssets embed.FS + +var tmpl = sync.OnceValue(func() *template.Template { + d, err := fs.Sub(templatesSrc, "assets") + if err != nil { + panic(fmt.Errorf("getting vmtest web templates subdir: %w", err)) + } + return template.Must(template.New("").Funcs(template.FuncMap{ + "formatDuration": formatDuration, + "ansi": ansiToHTML, + }).ParseFS(d, "*")) +}) + +// ansiToHTML converts a string with ANSI escape sequences to HTML with +// inline styles. Returns template.HTML so html/template doesn't double-escape it. +func ansiToHTML(s string) template.HTML { + return template.HTML(ansihtml.ConvertToHTML([]byte(s))) +} + +// formatDuration returns a human-readable duration like "1.2s" or "45.3s". +func formatDuration(d time.Duration) string { + if d < time.Second { + return fmt.Sprintf("%dms", d.Milliseconds()) + } + return fmt.Sprintf("%.1fs", d.Seconds()) +} + +// deterministicPort returns a deterministic port in the range [20000, 40000) +// based on the test name, so re-running the same test gets the same URL. +func deterministicPort(testName string) int { + return int(crc32.ChecksumIEEE([]byte(testName)))%20000 + 20000 +} + +// listenWeb listens on the given address. If the port is 0, it first tries a +// deterministic port based on the test name so re-runs get the same URL. +// Falls back to :0 (OS-assigned) on any listen error. +func (e *Env) listenWeb(addr string) (net.Listener, error) { + host, port, _ := net.SplitHostPort(addr) + if port == "0" { + detPort := deterministicPort(e.t.Name()) + detAddr := net.JoinHostPort(host, fmt.Sprintf("%d", detPort)) + if ln, err := net.Listen("tcp", detAddr); err == nil { + return ln, nil + } + // Deterministic port busy; fall back to OS-assigned. + } + return net.Listen("tcp", addr) +} + +// maybeStartWebServer starts the web UI if --vmtest-web is set. +// Called at the very top of Env.Start(), before compilation or image downloads. +func (e *Env) maybeStartWebServer() { + addr := *vmtestWeb + if addr == "" { + return + } + + ln, err := e.listenWeb(addr) + if err != nil { + e.t.Fatalf("vmtest-web listen: %v", err) + } + e.t.Cleanup(func() { ln.Close() }) + + actualAddr := ln.Addr().(*net.TCPAddr) + + host, _, _ := net.SplitHostPort(addr) + if host == "" || host == "0.0.0.0" || host == "::" { + hostname, err := os.Hostname() + if err != nil { + hostname = "localhost" + } + e.t.Logf("Status at http://%s:%d/", hostname, actualAddr.Port) + } else { + e.t.Logf("Status at http://%s/", actualAddr.String()) + } + + mux := http.NewServeMux() + mux.HandleFunc("GET /", e.serveIndex) + mux.HandleFunc("GET /ws", e.serveWebSocket) + mux.HandleFunc("GET /style.css", serveStaticAsset("style.css")) + + srv := &http.Server{Handler: mux} + go srv.Serve(ln) + e.t.Cleanup(func() { srv.Close() }) +} + +func serveStaticAsset(name string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if !strings.HasSuffix(name, ".css") { + http.Error(w, "not found", http.StatusNotFound) + return + } + w.Header().Set("Content-Type", "text/css") + f, err := staticAssets.Open(filepath.Join("assets", name)) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer f.Close() + io.Copy(w, f) + } +} + +func (e *Env) serveIndex(w http.ResponseWriter, r *http.Request) { + type indexData struct { + TestName string + TestStatus *TestStatus + Steps []*Step + Nodes []NodeStatus + } + + data := indexData{ + TestName: e.t.Name(), + TestStatus: e.testStatus, + Steps: e.Steps(), + } + for _, n := range e.nodes { + data.Nodes = append(data.Nodes, e.getNodeStatus(n.name)) + } + + w.Header().Set("Content-Type", "text/html; charset=utf-8") + if err := tmpl().ExecuteTemplate(w, "index.html", data); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } +} + +func (e *Env) serveWebSocket(w http.ResponseWriter, r *http.Request) { + conn, err := websocket.Accept(w, r, nil) + if err != nil { + return + } + defer conn.CloseNow() + wsCtx := conn.CloseRead(r.Context()) + + sub := e.eventBus.Subscribe() + defer sub.Close() + + for { + select { + case <-wsCtx.Done(): + return + case <-sub.Done(): + return + case ev := <-sub.Events(): + msg, err := conn.Writer(r.Context(), websocket.MessageText) + if err != nil { + return + } + if err := tmpl().ExecuteTemplate(msg, "event.html", ev); err != nil { + msg.Close() + return + } + if err := msg.Close(); err != nil { + return + } + } + } +} diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index f3c1108ed..efe2e1984 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -751,6 +751,10 @@ type Server struct { cloudInitData map[int]*CloudInitData // node num → cloud-init config fileContents map[string][]byte // filename → file bytes + + // onDHCPEvent, if non-nil, is called when DHCP messages are processed. + // Parameters are: source MAC, node number, DHCP message type, assigned IP. + onDHCPEvent func(nodeMAC MAC, nodeNum int, msgType layers.DHCPMsgType, assignedIP netip.Addr) } func (s *Server) logf(format string, args ...any) { @@ -765,6 +769,13 @@ func (s *Server) SetLoggerForTest(logf func(format string, args ...any)) { s.optLogf = logf } +// SetDHCPCallback registers a function to be called when DHCP messages are +// processed. The callback receives the source MAC, node number, DHCP message +// type (Discover, Offer, Request, Ack), and the assigned IP address. +func (s *Server) SetDHCPCallback(fn func(MAC, int, layers.DHCPMsgType, netip.Addr)) { + s.onDHCPEvent = fn +} + var derpMap = &tailcfg.DERPMap{ Regions: map[int]*tailcfg.DERPRegion{ 1: { @@ -1990,6 +2001,10 @@ func (s *Server) createDHCPResponse(request gopacket.Packet) ([]byte, error) { Length: 4, }, ) + if s.onDHCPEvent != nil { + s.onDHCPEvent(srcMAC, node.num, layers.DHCPMsgTypeDiscover, clientIP) + s.onDHCPEvent(srcMAC, node.num, layers.DHCPMsgTypeOffer, clientIP) + } case layers.DHCPMsgTypeRequest: response.Options = append(response.Options, layers.DHCPOption{ @@ -2018,6 +2033,10 @@ func (s *Server) createDHCPResponse(request gopacket.Packet) ([]byte, error) { Length: 4, }, ) + if s.onDHCPEvent != nil { + s.onDHCPEvent(srcMAC, node.num, layers.DHCPMsgTypeRequest, clientIP) + s.onDHCPEvent(srcMAC, node.num, layers.DHCPMsgTypeAck, clientIP) + } } eth := &layers.Ethernet{