Brad Fitzpatrick 4cec06b8f2 tstest/natlab/vmtest: add macOS VM screenshot streaming to web UI
When --vmtest-web is set, Host.app is launched with --screenshot-port 0
to start a localhost HTTP server that captures the VZVirtualMachineView
display. The Go test harness parses the SCREENSHOT_PORT=<port> line from
stdout, then polls every 2 seconds for JPEG thumbnails and pushes them
over WebSocket to the web dashboard.

Clicking a screenshot thumbnail opens a full-resolution image proxied
through the web UI's /screenshot/{node} endpoint.

Screenshot events are excluded from the EventBus history (they're large
and only the latest matters, stored in NodeStatus.Screenshot).

Updates #13038

Change-Id: I9bc67ddd1cc72948b33c555d4be3d8db06a41f6d
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2026-04-29 07:48:26 -07:00

328 lines
8.2 KiB
Go

// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package vmtest
import (
"fmt"
"sync"
"time"
)
// StepStatus is the state of a declared test step.
type StepStatus int
const (
StepPending StepStatus = iota // not yet started
StepRunning // Begin called
StepDone // End(nil) called
StepFailed // End(non-nil) called
)
func (s StepStatus) String() string {
switch s {
case StepPending:
return "pending"
case StepRunning:
return "running"
case StepDone:
return "done"
case StepFailed:
return "failed"
}
return fmt.Sprintf("StepStatus(%d)", int(s))
}
// Icon returns a Unicode icon for the step status.
func (s StepStatus) Icon() string {
switch s {
case StepPending:
return "○"
case StepRunning:
return "◉"
case StepDone:
return "✓"
case StepFailed:
return "✗"
}
return "?"
}
// Step is a declared stage of a test, created by [Env.AddStep].
// The web UI shows all steps from the start, tracking their progress.
type Step struct {
mu sync.Mutex
name string
index int // 0-based position in Env.steps
env *Env
status StepStatus
err error
started time.Time
ended time.Time
}
// Name returns the step's display name.
func (s *Step) Name() string { return s.name }
// Index returns the step's 0-based position.
func (s *Step) Index() int { return s.index }
// Status returns the current status.
func (s *Step) Status() StepStatus {
s.mu.Lock()
defer s.mu.Unlock()
return s.status
}
// Err returns the error if the step failed, or nil.
func (s *Step) Err() error {
s.mu.Lock()
defer s.mu.Unlock()
return s.err
}
// Elapsed returns how long the step has been running (if running)
// or how long it took (if done/failed). Returns 0 if pending.
func (s *Step) Elapsed() time.Duration {
s.mu.Lock()
defer s.mu.Unlock()
if s.started.IsZero() {
return 0
}
if !s.ended.IsZero() {
return s.ended.Sub(s.started)
}
return time.Since(s.started)
}
// Begin marks the step as running. Publishes an event to the web UI.
func (s *Step) Begin() {
s.mu.Lock()
if s.status != StepPending {
s.mu.Unlock()
panic(fmt.Sprintf("Step %q: Begin called in state %s", s.name, s.status))
}
s.started = time.Now()
s.status = StepRunning
s.mu.Unlock()
s.env.publishStepChange(s)
}
// End marks the step as done (err == nil) or failed (err != nil).
// It publishes a status change event to the web UI.
// It does not call t.Fatalf; callers should handle the error as appropriate
// (return it from errgroup, call t.Fatalf on the test goroutine, etc).
func (s *Step) End(err error) {
s.mu.Lock()
if s.status != StepRunning {
s.mu.Unlock()
panic(fmt.Sprintf("Step %q: End called in state %s", s.name, s.status))
}
s.ended = time.Now()
if err != nil {
s.status = StepFailed
s.err = err
} else {
s.status = StepDone
}
s.mu.Unlock()
s.env.publishStepChange(s)
}
// EventType identifies the kind of event published to the EventBus.
type EventType string
const (
EventStepChanged EventType = "step_changed" // a Step changed status
EventConsoleOutput EventType = "console_output" // serial console line
EventDHCPDiscover EventType = "dhcp_discover" // VM sent DHCP Discover
EventDHCPOffer EventType = "dhcp_offer" // server sent DHCP Offer
EventDHCPRequest EventType = "dhcp_request" // VM sent DHCP Request
EventDHCPAck EventType = "dhcp_ack" // server sent DHCP Ack
EventScreenshot EventType = "screenshot" // VM display screenshot (JPEG, base64)
EventTailscale EventType = "tailscale" // Tailscale status change
EventTestStatus EventType = "test_status" // test Running/Passed/Failed
)
// TestStatus tracks whether the overall test is running, passed, or failed.
type TestStatus struct {
mu sync.Mutex
state string // "Running", "Passed", "Failed"
started time.Time
ended time.Time
}
func newTestStatus() *TestStatus {
return &TestStatus{state: "Running", started: time.Now()}
}
// State returns the current test state.
func (ts *TestStatus) State() string {
ts.mu.Lock()
defer ts.mu.Unlock()
return ts.state
}
// Elapsed returns total test duration.
func (ts *TestStatus) Elapsed() time.Duration {
ts.mu.Lock()
defer ts.mu.Unlock()
if !ts.ended.IsZero() {
return ts.ended.Sub(ts.started)
}
return time.Since(ts.started)
}
// StartUnixMilli returns the test start time as Unix milliseconds,
// for the client-side elapsed timer.
func (ts *TestStatus) StartUnixMilli() int64 {
ts.mu.Lock()
defer ts.mu.Unlock()
return ts.started.UnixMilli()
}
// finish marks the test as passed or failed.
func (ts *TestStatus) finish(failed bool) {
ts.mu.Lock()
defer ts.mu.Unlock()
ts.ended = time.Now()
if failed {
ts.state = "Failed"
} else {
ts.state = "Passed"
}
}
// VMEvent is a single event published to the [EventBus].
type VMEvent struct {
Time time.Time
NodeName string // "" for global events
Type EventType
Message string // human-readable description
Detail string // e.g. IP address, node key
Step *Step // non-nil for EventStepChanged
NIC int // NIC index for DHCP events (0-based); -1 if not applicable
}
// NICStatus is the DHCP state for one NIC on a node.
type NICStatus struct {
NetName string // human label like "192.168.1.0/24" or "10.0.0.0/24"
DHCP string // "waiting", "Discover sent", "Got 10.0.0.101", etc.
}
// NodeStatus tracks the current DHCP and Tailscale state of a VM node
// for rendering on the web UI's initial page load.
type NodeStatus struct {
Name string
OS string
NICs []NICStatus // one per NIC; index matches NIC index
JoinsTailnet bool // whether this node runs Tailscale
Tailscale string // "--", "Up (100.64.0.1)", etc.
Console []string // recent console output lines (ring buffer)
Screenshot string // latest screenshot as data URI, or ""
ScreenshotPort int // Host.app screenshot server port, or 0
}
const maxConsoleLines = 200
const (
eventBusHistorySize = 500
subscriberChannelSize = 1000
)
// EventBus broadcasts VMEvents to subscribers and keeps a history for
// late joiners. It is safe for concurrent use.
type EventBus struct {
mu sync.Mutex
history []VMEvent
subscribers map[*subscriber]struct{}
}
func newEventBus() *EventBus {
return &EventBus{
subscribers: make(map[*subscriber]struct{}),
}
}
// Publish sends an event to all subscribers and appends it to the history.
// Non-blocking: slow subscribers are skipped.
func (b *EventBus) Publish(ev VMEvent) {
if ev.Time.IsZero() {
ev.Time = time.Now()
}
b.mu.Lock()
defer b.mu.Unlock()
// Don't store screenshots in history — they're large and only the
// latest one matters (stored in NodeStatus.Screenshot instead).
if ev.Type != EventScreenshot {
b.history = append(b.history, ev)
}
if len(b.history) > eventBusHistorySize {
// Trim old events.
copy(b.history, b.history[len(b.history)-eventBusHistorySize:])
b.history = b.history[:eventBusHistorySize]
}
for sub := range b.subscribers {
select {
case sub.ch <- ev:
default:
// Slow consumer, skip.
}
}
}
// Subscribe returns a new subscriber that receives the event history
// followed by live events.
func (b *EventBus) Subscribe() *subscriber {
b.mu.Lock()
defer b.mu.Unlock()
sub := &subscriber{
bus: b,
ch: make(chan VMEvent, subscriberChannelSize),
done: make(chan struct{}),
}
// Send history.
for _, ev := range b.history {
select {
case sub.ch <- ev:
default:
}
}
b.subscribers[sub] = struct{}{}
return sub
}
func (b *EventBus) unsubscribe(sub *subscriber) {
b.mu.Lock()
defer b.mu.Unlock()
delete(b.subscribers, sub)
}
// subscriber receives events from an [EventBus].
type subscriber struct {
bus *EventBus
ch chan VMEvent
done chan struct{}
once sync.Once
}
// Events returns the channel of events. Closed when Close is called.
func (s *subscriber) Events() <-chan VMEvent {
return s.ch
}
// Close unsubscribes and closes the event channel.
func (s *subscriber) Close() {
s.once.Do(func() {
if s.bus != nil {
s.bus.unsubscribe(s)
}
close(s.done)
})
}
// Done returns a channel that's closed when Close is called.
func (s *subscriber) Done() <-chan struct{} {
return s.done
}