health: make it omittable

Saves 86 KB.

And stop depending on expvar and usermetrics when disabled,
in prep to removing all the expvar/metrics/tsweb stuff.

Updates #12614

Change-Id: I35d2479ddd1d39b615bab32b1fa940ae8cbf9b11
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2025-10-03 11:05:37 -07:00 committed by Brad Fitzpatrick
parent 9c3aec58ba
commit 447cbdd1d0
17 changed files with 369 additions and 230 deletions

View File

@ -741,7 +741,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/logpolicy from tailscale.com/ipn/ipnlocal+ tailscale.com/logpolicy from tailscale.com/ipn/ipnlocal+
tailscale.com/logtail from tailscale.com/control/controlclient+ tailscale.com/logtail from tailscale.com/control/controlclient+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/health+ tailscale.com/metrics from tailscale.com/net/tstun+
tailscale.com/net/ace from tailscale.com/control/controlhttp tailscale.com/net/ace from tailscale.com/control/controlhttp
tailscale.com/net/bakedroots from tailscale.com/net/tlsdial+ tailscale.com/net/bakedroots from tailscale.com/net/tlsdial+
💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock 💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock

View File

@ -112,7 +112,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/ipn/ipnstate from tailscale.com/client/local+ tailscale.com/ipn/ipnstate from tailscale.com/client/local+
tailscale.com/kube/kubetypes from tailscale.com/envknob tailscale.com/kube/kubetypes from tailscale.com/envknob
tailscale.com/licenses from tailscale.com/client/web+ tailscale.com/licenses from tailscale.com/client/web+
tailscale.com/metrics from tailscale.com/health+ tailscale.com/metrics from tailscale.com/tsweb+
tailscale.com/net/ace from tailscale.com/cmd/tailscale/cli+ tailscale.com/net/ace from tailscale.com/cmd/tailscale/cli+
tailscale.com/net/bakedroots from tailscale.com/net/tlsdial tailscale.com/net/bakedroots from tailscale.com/net/tlsdial
tailscale.com/net/captivedetection from tailscale.com/net/netcheck tailscale.com/net/captivedetection from tailscale.com/net/netcheck

View File

@ -76,7 +76,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/logpolicy from tailscale.com/cmd/tailscaled+ tailscale.com/logpolicy from tailscale.com/cmd/tailscaled+
tailscale.com/logtail from tailscale.com/cmd/tailscaled+ tailscale.com/logtail from tailscale.com/cmd/tailscaled+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/health+ tailscale.com/metrics from tailscale.com/net/tstun+
tailscale.com/net/ace from tailscale.com/control/controlhttp tailscale.com/net/ace from tailscale.com/control/controlhttp
tailscale.com/net/bakedroots from tailscale.com/net/tlsdial tailscale.com/net/bakedroots from tailscale.com/net/tlsdial
💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock 💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock
@ -179,7 +179,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/util/syspolicy/policyclient from tailscale.com/cmd/tailscaled+ tailscale.com/util/syspolicy/policyclient from tailscale.com/cmd/tailscaled+
tailscale.com/util/syspolicy/ptype from tailscale.com/ipn/ipnlocal+ tailscale.com/util/syspolicy/ptype from tailscale.com/ipn/ipnlocal+
tailscale.com/util/testenv from tailscale.com/control/controlclient+ tailscale.com/util/testenv from tailscale.com/control/controlclient+
tailscale.com/util/usermetric from tailscale.com/health+ tailscale.com/util/usermetric from tailscale.com/ipn/ipnlocal+
tailscale.com/util/vizerror from tailscale.com/tailcfg+ tailscale.com/util/vizerror from tailscale.com/tailcfg+
tailscale.com/util/winutil from tailscale.com/ipn/ipnauth tailscale.com/util/winutil from tailscale.com/ipn/ipnauth
tailscale.com/util/zstdframe from tailscale.com/control/controlclient tailscale.com/util/zstdframe from tailscale.com/control/controlclient
@ -324,7 +324,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
encoding/json from expvar+ encoding/json from expvar+
encoding/pem from crypto/tls+ encoding/pem from crypto/tls+
errors from bufio+ errors from bufio+
expvar from tailscale.com/health+ expvar from tailscale.com/metrics+
flag from tailscale.com/cmd/tailscaled+ flag from tailscale.com/cmd/tailscaled+
fmt from compress/flate+ fmt from compress/flate+
hash from crypto+ hash from crypto+

View File

@ -99,7 +99,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/logpolicy from tailscale.com/cmd/tailscaled+ tailscale.com/logpolicy from tailscale.com/cmd/tailscaled+
tailscale.com/logtail from tailscale.com/cmd/tailscaled+ tailscale.com/logtail from tailscale.com/cmd/tailscaled+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/health+ tailscale.com/metrics from tailscale.com/net/tstun+
tailscale.com/net/ace from tailscale.com/control/controlhttp+ tailscale.com/net/ace from tailscale.com/control/controlhttp+
tailscale.com/net/bakedroots from tailscale.com/net/tlsdial tailscale.com/net/bakedroots from tailscale.com/net/tlsdial
💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock 💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock
@ -206,7 +206,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/util/syspolicy/policyclient from tailscale.com/cmd/tailscaled+ tailscale.com/util/syspolicy/policyclient from tailscale.com/cmd/tailscaled+
tailscale.com/util/syspolicy/ptype from tailscale.com/ipn/ipnlocal+ tailscale.com/util/syspolicy/ptype from tailscale.com/ipn/ipnlocal+
tailscale.com/util/testenv from tailscale.com/control/controlclient+ tailscale.com/util/testenv from tailscale.com/control/controlclient+
tailscale.com/util/usermetric from tailscale.com/health+ tailscale.com/util/usermetric from tailscale.com/ipn/ipnlocal+
tailscale.com/util/vizerror from tailscale.com/tailcfg+ tailscale.com/util/vizerror from tailscale.com/tailcfg+
tailscale.com/util/winutil from tailscale.com/ipn/ipnauth tailscale.com/util/winutil from tailscale.com/ipn/ipnauth
tailscale.com/util/zstdframe from tailscale.com/control/controlclient tailscale.com/util/zstdframe from tailscale.com/control/controlclient
@ -353,7 +353,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
encoding/json from expvar+ encoding/json from expvar+
encoding/pem from crypto/tls+ encoding/pem from crypto/tls+
errors from bufio+ errors from bufio+
expvar from tailscale.com/health+ expvar from tailscale.com/metrics+
flag from tailscale.com/cmd/tailscaled+ flag from tailscale.com/cmd/tailscaled+
fmt from compress/flate+ fmt from compress/flate+
hash from crypto+ hash from crypto+

View File

@ -321,7 +321,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/logpolicy from tailscale.com/cmd/tailscaled+ tailscale.com/logpolicy from tailscale.com/cmd/tailscaled+
tailscale.com/logtail from tailscale.com/cmd/tailscaled+ tailscale.com/logtail from tailscale.com/cmd/tailscaled+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/health+ tailscale.com/metrics from tailscale.com/net/tstun+
tailscale.com/net/ace from tailscale.com/control/controlhttp tailscale.com/net/ace from tailscale.com/control/controlhttp
tailscale.com/net/bakedroots from tailscale.com/net/tlsdial+ tailscale.com/net/bakedroots from tailscale.com/net/tlsdial+
💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock+ 💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock+

View File

@ -169,7 +169,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
tailscale.com/logpolicy from tailscale.com/ipn/ipnlocal+ tailscale.com/logpolicy from tailscale.com/ipn/ipnlocal+
tailscale.com/logtail from tailscale.com/control/controlclient+ tailscale.com/logtail from tailscale.com/control/controlclient+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/health+ tailscale.com/metrics from tailscale.com/net/tstun+
tailscale.com/net/ace from tailscale.com/control/controlhttp tailscale.com/net/ace from tailscale.com/control/controlhttp
tailscale.com/net/bakedroots from tailscale.com/ipn/ipnlocal+ tailscale.com/net/bakedroots from tailscale.com/ipn/ipnlocal+
💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock 💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock

View File

@ -0,0 +1,13 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Code generated by gen.go; DO NOT EDIT.
//go:build ts_omit_health
package buildfeatures
// HasHealth is whether the binary was built with support for modular feature "Health checking support".
// Specifically, it's whether the binary was NOT built with the "ts_omit_health" build tag.
// It's a const so it can be used for dead code elimination.
const HasHealth = false

View File

@ -0,0 +1,13 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Code generated by gen.go; DO NOT EDIT.
//go:build !ts_omit_health
package buildfeatures
// HasHealth is whether the binary was built with support for modular feature "Health checking support".
// Specifically, it's whether the binary was NOT built with the "ts_omit_health" build tag.
// It's a const so it can be used for dead code elimination.
const HasHealth = true

View File

@ -150,6 +150,7 @@ var Features = map[FeatureTag]FeatureMeta{
Desc: "Generic Receive Offload support (performance)", Desc: "Generic Receive Offload support (performance)",
Deps: []FeatureTag{"netstack"}, Deps: []FeatureTag{"netstack"},
}, },
"health": {Sym: "Health", Desc: "Health checking support"},
"hujsonconf": {Sym: "HuJSONConf", Desc: "HuJSON config file support"}, "hujsonconf": {Sym: "HuJSONConf", Desc: "HuJSON config file support"},
"iptables": {Sym: "IPTables", Desc: "Linux iptables support"}, "iptables": {Sym: "IPTables", Desc: "Linux iptables support"},
"kube": {Sym: "Kube", Desc: "Kubernetes integration"}, "kube": {Sym: "Kube", Desc: "Kubernetes integration"},

View File

@ -8,7 +8,6 @@ package health
import ( import (
"context" "context"
"errors" "errors"
"expvar"
"fmt" "fmt"
"maps" "maps"
"net/http" "net/http"
@ -20,14 +19,13 @@ import (
"time" "time"
"tailscale.com/envknob" "tailscale.com/envknob"
"tailscale.com/metrics" "tailscale.com/feature/buildfeatures"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/tstime" "tailscale.com/tstime"
"tailscale.com/types/opt" "tailscale.com/types/opt"
"tailscale.com/util/cibuild" "tailscale.com/util/cibuild"
"tailscale.com/util/eventbus" "tailscale.com/util/eventbus"
"tailscale.com/util/mak" "tailscale.com/util/mak"
"tailscale.com/util/usermetric"
"tailscale.com/version" "tailscale.com/version"
) )
@ -132,12 +130,15 @@ type Tracker struct {
lastLoginErr error lastLoginErr error
localLogConfigErr error localLogConfigErr error
tlsConnectionErrors map[string]error // map[ServerName]error tlsConnectionErrors map[string]error // map[ServerName]error
metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel] metricHealthMessage any // nil or *metrics.MultiLabelMap[metricHealthMessageLabel]
} }
// NewTracker contructs a new [Tracker] and attaches the given eventbus. // NewTracker contructs a new [Tracker] and attaches the given eventbus.
// NewTracker will panic is no eventbus is given. // NewTracker will panic is no eventbus is given.
func NewTracker(bus *eventbus.Bus) *Tracker { func NewTracker(bus *eventbus.Bus) *Tracker {
if !buildfeatures.HasHealth {
return &Tracker{}
}
if bus == nil { if bus == nil {
panic("no eventbus set") panic("no eventbus set")
} }
@ -221,6 +222,9 @@ const legacyErrorArgKey = "LegacyError"
// temporarily (2024-06-14) while we migrate the old health infrastructure based // temporarily (2024-06-14) while we migrate the old health infrastructure based
// on Subsystems to the new Warnables architecture. // on Subsystems to the new Warnables architecture.
func (s Subsystem) Warnable() *Warnable { func (s Subsystem) Warnable() *Warnable {
if !buildfeatures.HasHealth {
return &noopWarnable
}
w, ok := subsystemsWarnables[s] w, ok := subsystemsWarnables[s]
if !ok { if !ok {
panic(fmt.Sprintf("health: no Warnable for Subsystem %q", s)) panic(fmt.Sprintf("health: no Warnable for Subsystem %q", s))
@ -230,10 +234,15 @@ func (s Subsystem) Warnable() *Warnable {
var registeredWarnables = map[WarnableCode]*Warnable{} var registeredWarnables = map[WarnableCode]*Warnable{}
var noopWarnable Warnable
// Register registers a new Warnable with the health package and returns it. // Register registers a new Warnable with the health package and returns it.
// Register panics if the Warnable was already registered, because Warnables // Register panics if the Warnable was already registered, because Warnables
// should be unique across the program. // should be unique across the program.
func Register(w *Warnable) *Warnable { func Register(w *Warnable) *Warnable {
if !buildfeatures.HasHealth {
return &noopWarnable
}
if registeredWarnables[w.Code] != nil { if registeredWarnables[w.Code] != nil {
panic(fmt.Sprintf("health: a Warnable with code %q was already registered", w.Code)) panic(fmt.Sprintf("health: a Warnable with code %q was already registered", w.Code))
} }
@ -245,6 +254,9 @@ func Register(w *Warnable) *Warnable {
// unregister removes a Warnable from the health package. It should only be used // unregister removes a Warnable from the health package. It should only be used
// for testing purposes. // for testing purposes.
func unregister(w *Warnable) { func unregister(w *Warnable) {
if !buildfeatures.HasHealth {
return
}
if registeredWarnables[w.Code] == nil { if registeredWarnables[w.Code] == nil {
panic(fmt.Sprintf("health: attempting to unregister Warnable %q that was not registered", w.Code)) panic(fmt.Sprintf("health: attempting to unregister Warnable %q that was not registered", w.Code))
} }
@ -317,6 +329,9 @@ func StaticMessage(s string) func(Args) string {
// some lost Tracker plumbing, we want to capture stack trace // some lost Tracker plumbing, we want to capture stack trace
// samples when it occurs. // samples when it occurs.
func (t *Tracker) nil() bool { func (t *Tracker) nil() bool {
if !buildfeatures.HasHealth {
return true
}
if t != nil { if t != nil {
return false return false
} }
@ -385,37 +400,10 @@ func (w *Warnable) IsVisible(ws *warningState, clockNow func() time.Time) bool {
return clockNow().Sub(ws.BrokenSince) >= w.TimeToVisible return clockNow().Sub(ws.BrokenSince) >= w.TimeToVisible
} }
// SetMetricsRegistry sets up the metrics for the Tracker. It takes
// a usermetric.Registry and registers the metrics there.
func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
if reg == nil || t.metricHealthMessage != nil {
return
}
t.metricHealthMessage = usermetric.NewMultiLabelMapWithRegistry[metricHealthMessageLabel](
reg,
"tailscaled_health_messages",
"gauge",
"Number of health messages broken down by type.",
)
t.metricHealthMessage.Set(metricHealthMessageLabel{
Type: MetricLabelWarning,
}, expvar.Func(func() any {
if t.nil() {
return 0
}
t.mu.Lock()
defer t.mu.Unlock()
t.updateBuiltinWarnablesLocked()
return int64(len(t.stringsLocked()))
}))
}
// IsUnhealthy reports whether the current state is unhealthy because the given // IsUnhealthy reports whether the current state is unhealthy because the given
// warnable is set. // warnable is set.
func (t *Tracker) IsUnhealthy(w *Warnable) bool { func (t *Tracker) IsUnhealthy(w *Warnable) bool {
if t.nil() { if !buildfeatures.HasHealth || t.nil() {
return false return false
} }
t.mu.Lock() t.mu.Lock()
@ -429,7 +417,7 @@ func (t *Tracker) IsUnhealthy(w *Warnable) bool {
// SetUnhealthy takes ownership of args. The args can be nil if no additional information is // SetUnhealthy takes ownership of args. The args can be nil if no additional information is
// needed for the unhealthy state. // needed for the unhealthy state.
func (t *Tracker) SetUnhealthy(w *Warnable, args Args) { func (t *Tracker) SetUnhealthy(w *Warnable, args Args) {
if t.nil() { if !buildfeatures.HasHealth || t.nil() {
return return
} }
t.mu.Lock() t.mu.Lock()
@ -438,7 +426,7 @@ func (t *Tracker) SetUnhealthy(w *Warnable, args Args) {
} }
func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) { func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
if w == nil { if !buildfeatures.HasHealth || w == nil {
return return
} }
@ -489,7 +477,7 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
// SetHealthy removes any warningState for the given Warnable. // SetHealthy removes any warningState for the given Warnable.
func (t *Tracker) SetHealthy(w *Warnable) { func (t *Tracker) SetHealthy(w *Warnable) {
if t.nil() { if !buildfeatures.HasHealth || t.nil() {
return return
} }
t.mu.Lock() t.mu.Lock()
@ -498,7 +486,7 @@ func (t *Tracker) SetHealthy(w *Warnable) {
} }
func (t *Tracker) setHealthyLocked(w *Warnable) { func (t *Tracker) setHealthyLocked(w *Warnable) {
if t.warnableVal[w] == nil { if !buildfeatures.HasHealth || t.warnableVal[w] == nil {
// Nothing to remove // Nothing to remove
return return
} }
@ -1009,7 +997,7 @@ func (t *Tracker) OverallError() error {
// each Warning to show a localized version of them instead. This function is // each Warning to show a localized version of them instead. This function is
// here for legacy compatibility purposes and is deprecated. // here for legacy compatibility purposes and is deprecated.
func (t *Tracker) Strings() []string { func (t *Tracker) Strings() []string {
if t.nil() { if !buildfeatures.HasHealth || t.nil() {
return nil return nil
} }
t.mu.Lock() t.mu.Lock()
@ -1018,6 +1006,9 @@ func (t *Tracker) Strings() []string {
} }
func (t *Tracker) stringsLocked() []string { func (t *Tracker) stringsLocked() []string {
if !buildfeatures.HasHealth {
return nil
}
result := []string{} result := []string{}
for w, ws := range t.warnableVal { for w, ws := range t.warnableVal {
if !w.IsVisible(ws, t.now) { if !w.IsVisible(ws, t.now) {
@ -1078,6 +1069,9 @@ var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR")
// updateBuiltinWarnablesLocked performs a number of checks on the state of the backend, // updateBuiltinWarnablesLocked performs a number of checks on the state of the backend,
// and adds/removes Warnings from the Tracker as needed. // and adds/removes Warnings from the Tracker as needed.
func (t *Tracker) updateBuiltinWarnablesLocked() { func (t *Tracker) updateBuiltinWarnablesLocked() {
if !buildfeatures.HasHealth {
return
}
t.updateWarmingUpWarnableLocked() t.updateWarmingUpWarnableLocked()
if w, show := t.showUpdateWarnable(); show { if w, show := t.showUpdateWarnable(); show {
@ -1316,11 +1310,17 @@ func (s *ReceiveFuncStats) Name() string {
} }
func (s *ReceiveFuncStats) Enter() { func (s *ReceiveFuncStats) Enter() {
if !buildfeatures.HasHealth {
return
}
s.numCalls.Add(1) s.numCalls.Add(1)
s.inCall.Store(true) s.inCall.Store(true)
} }
func (s *ReceiveFuncStats) Exit() { func (s *ReceiveFuncStats) Exit() {
if !buildfeatures.HasHealth {
return
}
s.inCall.Store(false) s.inCall.Store(false)
} }
@ -1329,7 +1329,7 @@ func (s *ReceiveFuncStats) Exit() {
// //
// If t is nil, it returns nil. // If t is nil, it returns nil.
func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats { func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats {
if t == nil { if !buildfeatures.HasHealth || t == nil {
return nil return nil
} }
t.initOnce.Do(t.doOnceInit) t.initOnce.Do(t.doOnceInit)
@ -1337,6 +1337,9 @@ func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats {
} }
func (t *Tracker) doOnceInit() { func (t *Tracker) doOnceInit() {
if !buildfeatures.HasHealth {
return
}
for i := range t.MagicSockReceiveFuncs { for i := range t.MagicSockReceiveFuncs {
f := &t.MagicSockReceiveFuncs[i] f := &t.MagicSockReceiveFuncs[i]
f.name = (ReceiveFunc(i)).String() f.name = (ReceiveFunc(i)).String()
@ -1385,10 +1388,3 @@ func (t *Tracker) LastNoiseDialWasRecent() bool {
t.lastNoiseDial = now t.lastNoiseDial = now
return dur < 2*time.Minute return dur < 2*time.Minute
} }
const MetricLabelWarning = "warning"
type metricHealthMessageLabel struct {
// TODO: break down by warnable.severity as well?
Type string
}

View File

@ -15,6 +15,7 @@ import (
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts" "github.com/google/go-cmp/cmp/cmpopts"
"tailscale.com/metrics"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/tstest" "tailscale.com/tstest"
"tailscale.com/tstime" "tailscale.com/tstime"
@ -497,7 +498,11 @@ func TestHealthMetric(t *testing.T) {
tr.applyUpdates = tt.apply tr.applyUpdates = tt.apply
tr.latestVersion = tt.cv tr.latestVersion = tt.cv
tr.SetMetricsRegistry(&usermetric.Registry{}) tr.SetMetricsRegistry(&usermetric.Registry{})
if val := tr.metricHealthMessage.Get(metricHealthMessageLabel{Type: MetricLabelWarning}).String(); val != strconv.Itoa(tt.wantMetricCount) { m, ok := tr.metricHealthMessage.(*metrics.MultiLabelMap[metricHealthMessageLabel])
if !ok {
t.Fatal("metricHealthMessage has wrong type or is nil")
}
if val := m.Get(metricHealthMessageLabel{Type: MetricLabelWarning}).String(); val != strconv.Itoa(tt.wantMetricCount) {
t.Fatalf("metric value: %q, want: %q", val, strconv.Itoa(tt.wantMetricCount)) t.Fatalf("metric value: %q, want: %q", val, strconv.Itoa(tt.wantMetricCount))
} }
for _, w := range tr.CurrentState().Warnings { for _, w := range tr.CurrentState().Warnings {
@ -634,7 +639,11 @@ func TestControlHealth(t *testing.T) {
var r usermetric.Registry var r usermetric.Registry
ht.SetMetricsRegistry(&r) ht.SetMetricsRegistry(&r)
got := ht.metricHealthMessage.Get(metricHealthMessageLabel{ m, ok := ht.metricHealthMessage.(*metrics.MultiLabelMap[metricHealthMessageLabel])
if !ok {
t.Fatal("metricHealthMessage has wrong type or is nil")
}
got := m.Get(metricHealthMessageLabel{
Type: MetricLabelWarning, Type: MetricLabelWarning,
}).String() }).String()
want := strconv.Itoa( want := strconv.Itoa(

View File

@ -9,6 +9,7 @@ import (
"encoding/json" "encoding/json"
"time" "time"
"tailscale.com/feature/buildfeatures"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
) )
@ -120,7 +121,7 @@ func (w *Warnable) unhealthyState(ws *warningState) *UnhealthyState {
// The returned State is a snapshot of shared memory, and the caller should not // The returned State is a snapshot of shared memory, and the caller should not
// mutate the returned value. // mutate the returned value.
func (t *Tracker) CurrentState() *State { func (t *Tracker) CurrentState() *State {
if t.nil() { if !buildfeatures.HasHealth || t.nil() {
return &State{} return &State{}
} }

52
health/usermetrics.go Normal file
View File

@ -0,0 +1,52 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ts_omit_health && !ts_omit_usermetrics
package health
import (
"expvar"
"tailscale.com/feature/buildfeatures"
"tailscale.com/util/usermetric"
)
const MetricLabelWarning = "warning"
type metricHealthMessageLabel struct {
// TODO: break down by warnable.severity as well?
Type string
}
// SetMetricsRegistry sets up the metrics for the Tracker. It takes
// a usermetric.Registry and registers the metrics there.
func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
if !buildfeatures.HasHealth {
return
}
if reg == nil || t.metricHealthMessage != nil {
return
}
m := usermetric.NewMultiLabelMapWithRegistry[metricHealthMessageLabel](
reg,
"tailscaled_health_messages",
"gauge",
"Number of health messages broken down by type.",
)
m.Set(metricHealthMessageLabel{
Type: MetricLabelWarning,
}, expvar.Func(func() any {
if t.nil() {
return 0
}
t.mu.Lock()
defer t.mu.Unlock()
t.updateBuiltinWarnablesLocked()
return int64(len(t.stringsLocked()))
}))
t.metricHealthMessage = m
}

View File

@ -0,0 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ts_omit_health || ts_omit_usermetrics
package health
func (t *Tracker) SetMetricsRegistry(any) {}

View File

@ -8,15 +8,24 @@ import (
"runtime" "runtime"
"time" "time"
"tailscale.com/feature/buildfeatures"
"tailscale.com/version" "tailscale.com/version"
) )
func condRegister(f func() *Warnable) *Warnable {
if !buildfeatures.HasHealth {
return nil
}
return f()
}
/** /**
This file contains definitions for the Warnables maintained within this `health` package. This file contains definitions for the Warnables maintained within this `health` package.
*/ */
// updateAvailableWarnable is a Warnable that warns the user that an update is available. // updateAvailableWarnable is a Warnable that warns the user that an update is available.
var updateAvailableWarnable = Register(&Warnable{ var updateAvailableWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "update-available", Code: "update-available",
Title: "Update available", Title: "Update available",
Severity: SeverityLow, Severity: SeverityLow,
@ -27,10 +36,12 @@ var updateAvailableWarnable = Register(&Warnable{
return fmt.Sprintf("An update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion]) return fmt.Sprintf("An update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion])
} }
}, },
}
}) })
// securityUpdateAvailableWarnable is a Warnable that warns the user that an important security update is available. // securityUpdateAvailableWarnable is a Warnable that warns the user that an important security update is available.
var securityUpdateAvailableWarnable = Register(&Warnable{ var securityUpdateAvailableWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "security-update-available", Code: "security-update-available",
Title: "Security update available", Title: "Security update available",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -41,48 +52,58 @@ var securityUpdateAvailableWarnable = Register(&Warnable{
return fmt.Sprintf("A security update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion]) return fmt.Sprintf("A security update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion])
} }
}, },
}
}) })
// unstableWarnable is a Warnable that warns the user that they are using an unstable version of Tailscale // unstableWarnable is a Warnable that warns the user that they are using an unstable version of Tailscale
// so they won't be surprised by all the issues that may arise. // so they won't be surprised by all the issues that may arise.
var unstableWarnable = Register(&Warnable{ var unstableWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "is-using-unstable-version", Code: "is-using-unstable-version",
Title: "Using an unstable version", Title: "Using an unstable version",
Severity: SeverityLow, Severity: SeverityLow,
Text: StaticMessage("This is an unstable version of Tailscale meant for testing and development purposes. Please report any issues to Tailscale."), Text: StaticMessage("This is an unstable version of Tailscale meant for testing and development purposes. Please report any issues to Tailscale."),
}
}) })
// NetworkStatusWarnable is a Warnable that warns the user that the network is down. // NetworkStatusWarnable is a Warnable that warns the user that the network is down.
var NetworkStatusWarnable = Register(&Warnable{ var NetworkStatusWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "network-status", Code: "network-status",
Title: "Network down", Title: "Network down",
Severity: SeverityMedium, Severity: SeverityMedium,
Text: StaticMessage("Tailscale cannot connect because the network is down. Check your Internet connection."), Text: StaticMessage("Tailscale cannot connect because the network is down. Check your Internet connection."),
ImpactsConnectivity: true, ImpactsConnectivity: true,
TimeToVisible: 5 * time.Second, TimeToVisible: 5 * time.Second,
}
}) })
// IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped. // IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped.
var IPNStateWarnable = Register(&Warnable{ var IPNStateWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "wantrunning-false", Code: "wantrunning-false",
Title: "Tailscale off", Title: "Tailscale off",
Severity: SeverityLow, Severity: SeverityLow,
Text: StaticMessage("Tailscale is stopped."), Text: StaticMessage("Tailscale is stopped."),
}
}) })
// localLogWarnable is a Warnable that warns the user that the local log is misconfigured. // localLogWarnable is a Warnable that warns the user that the local log is misconfigured.
var localLogWarnable = Register(&Warnable{ var localLogWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "local-log-config-error", Code: "local-log-config-error",
Title: "Local log misconfiguration", Title: "Local log misconfiguration",
Severity: SeverityLow, Severity: SeverityLow,
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("The local log is misconfigured: %v", args[ArgError]) return fmt.Sprintf("The local log is misconfigured: %v", args[ArgError])
}, },
}
}) })
// LoginStateWarnable is a Warnable that warns the user that they are logged out, // LoginStateWarnable is a Warnable that warns the user that they are logged out,
// and provides the last login error if available. // and provides the last login error if available.
var LoginStateWarnable = Register(&Warnable{ var LoginStateWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "login-state", Code: "login-state",
Title: "Logged out", Title: "Logged out",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -94,10 +115,12 @@ var LoginStateWarnable = Register(&Warnable{
} }
}, },
DependsOn: []*Warnable{IPNStateWarnable}, DependsOn: []*Warnable{IPNStateWarnable},
}
}) })
// notInMapPollWarnable is a Warnable that warns the user that we are using a stale network map. // notInMapPollWarnable is a Warnable that warns the user that we are using a stale network map.
var notInMapPollWarnable = Register(&Warnable{ var notInMapPollWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "not-in-map-poll", Code: "not-in-map-poll",
Title: "Out of sync", Title: "Out of sync",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -105,10 +128,12 @@ var notInMapPollWarnable = Register(&Warnable{
Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."), Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."),
// 8 minutes reflects a maximum maintenance window for the coordination server. // 8 minutes reflects a maximum maintenance window for the coordination server.
TimeToVisible: 8 * time.Minute, TimeToVisible: 8 * time.Minute,
}
}) })
// noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP. // noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP.
var noDERPHomeWarnable = Register(&Warnable{ var noDERPHomeWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "no-derp-home", Code: "no-derp-home",
Title: "No home relay server", Title: "No home relay server",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -116,10 +141,12 @@ var noDERPHomeWarnable = Register(&Warnable{
Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."), Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."),
ImpactsConnectivity: true, ImpactsConnectivity: true,
TimeToVisible: 10 * time.Second, TimeToVisible: 10 * time.Second,
}
}) })
// noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server. // noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server.
var noDERPConnectionWarnable = Register(&Warnable{ var noDERPConnectionWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "no-derp-connection", Code: "no-derp-connection",
Title: "Relay server unavailable", Title: "Relay server unavailable",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -143,11 +170,13 @@ var noDERPConnectionWarnable = Register(&Warnable{
}, },
ImpactsConnectivity: true, ImpactsConnectivity: true,
TimeToVisible: 10 * time.Second, TimeToVisible: 10 * time.Second,
}
}) })
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't // derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't
// heard from the home DERP region for a while. // heard from the home DERP region for a while.
var derpTimeoutWarnable = Register(&Warnable{ var derpTimeoutWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "derp-timed-out", Code: "derp-timed-out",
Title: "Relay server timed out", Title: "Relay server timed out",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -163,10 +192,12 @@ var derpTimeoutWarnable = Register(&Warnable{
return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region ID '%v') in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgDERPRegionID], args[ArgDuration]) return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region ID '%v') in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgDERPRegionID], args[ArgDuration])
} }
}, },
}
}) })
// derpRegionErrorWarnable is a Warnable that warns the user that a DERP region is reporting an issue. // derpRegionErrorWarnable is a Warnable that warns the user that a DERP region is reporting an issue.
var derpRegionErrorWarnable = Register(&Warnable{ var derpRegionErrorWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "derp-region-error", Code: "derp-region-error",
Title: "Relay server error", Title: "Relay server error",
Severity: SeverityLow, Severity: SeverityLow,
@ -174,20 +205,24 @@ var derpRegionErrorWarnable = Register(&Warnable{
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgDERPRegionID], args[ArgError]) return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgDERPRegionID], args[ArgError])
}, },
}
}) })
// noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections. // noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections.
var noUDP4BindWarnable = Register(&Warnable{ var noUDP4BindWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "no-udp4-bind", Code: "no-udp4-bind",
Title: "NAT traversal setup failure", Title: "NAT traversal setup failure",
Severity: SeverityMedium, Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable}, DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: StaticMessage("Tailscale couldn't listen for incoming UDP connections."), Text: StaticMessage("Tailscale couldn't listen for incoming UDP connections."),
ImpactsConnectivity: true, ImpactsConnectivity: true,
}
}) })
// mapResponseTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't received a network map from the coordination server in a while. // mapResponseTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't received a network map from the coordination server in a while.
var mapResponseTimeoutWarnable = Register(&Warnable{ var mapResponseTimeoutWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "mapresponse-timeout", Code: "mapresponse-timeout",
Title: "Network map response timeout", Title: "Network map response timeout",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -195,10 +230,12 @@ var mapResponseTimeoutWarnable = Register(&Warnable{
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("Tailscale hasn't received a network map from the coordination server in %s.", args[ArgDuration]) return fmt.Sprintf("Tailscale hasn't received a network map from the coordination server in %s.", args[ArgDuration])
}, },
}
}) })
// tlsConnectionFailedWarnable is a Warnable that warns the user that Tailscale could not establish an encrypted connection with a server. // tlsConnectionFailedWarnable is a Warnable that warns the user that Tailscale could not establish an encrypted connection with a server.
var tlsConnectionFailedWarnable = Register(&Warnable{ var tlsConnectionFailedWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "tls-connection-failed", Code: "tls-connection-failed",
Title: "Encrypted connection failed", Title: "Encrypted connection failed",
Severity: SeverityMedium, Severity: SeverityMedium,
@ -206,36 +243,43 @@ var tlsConnectionFailedWarnable = Register(&Warnable{
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("Tailscale could not establish an encrypted connection with '%q': %v", args[ArgServerName], args[ArgError]) return fmt.Sprintf("Tailscale could not establish an encrypted connection with '%q': %v", args[ArgServerName], args[ArgError])
}, },
}
}) })
// magicsockReceiveFuncWarnable is a Warnable that warns the user that one of the Magicsock functions is not running. // magicsockReceiveFuncWarnable is a Warnable that warns the user that one of the Magicsock functions is not running.
var magicsockReceiveFuncWarnable = Register(&Warnable{ var magicsockReceiveFuncWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "magicsock-receive-func-error", Code: "magicsock-receive-func-error",
Title: "MagicSock function not running", Title: "MagicSock function not running",
Severity: SeverityMedium, Severity: SeverityMedium,
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("The MagicSock function %s is not running. You might experience connectivity issues.", args[ArgMagicsockFunctionName]) return fmt.Sprintf("The MagicSock function %s is not running. You might experience connectivity issues.", args[ArgMagicsockFunctionName])
}, },
}
}) })
// testWarnable is a Warnable that is used within this package for testing purposes only. // testWarnable is a Warnable that is used within this package for testing purposes only.
var testWarnable = Register(&Warnable{ var testWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "test-warnable", Code: "test-warnable",
Title: "Test warnable", Title: "Test warnable",
Severity: SeverityLow, Severity: SeverityLow,
Text: func(args Args) string { Text: func(args Args) string {
return args[ArgError] return args[ArgError]
}, },
}
}) })
// applyDiskConfigWarnable is a Warnable that warns the user that there was an error applying the envknob config stored on disk. // applyDiskConfigWarnable is a Warnable that warns the user that there was an error applying the envknob config stored on disk.
var applyDiskConfigWarnable = Register(&Warnable{ var applyDiskConfigWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "apply-disk-config", Code: "apply-disk-config",
Title: "Could not apply configuration", Title: "Could not apply configuration",
Severity: SeverityMedium, Severity: SeverityMedium,
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("An error occurred applying the Tailscale envknob configuration stored on disk: %v", args[ArgError]) return fmt.Sprintf("An error occurred applying the Tailscale envknob configuration stored on disk: %v", args[ArgError])
}, },
}
}) })
// warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user // warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user
@ -245,9 +289,11 @@ const warmingUpWarnableDuration = 5 * time.Second
// warmingUpWarnable is a Warnable that is reported by the backend when it is starting up, for a maximum time of // warmingUpWarnable is a Warnable that is reported by the backend when it is starting up, for a maximum time of
// warmingUpWarnableDuration. The GUIs use the presence of this Warnable to prevent showing any other warnings until // warmingUpWarnableDuration. The GUIs use the presence of this Warnable to prevent showing any other warnings until
// the backend is fully started. // the backend is fully started.
var warmingUpWarnable = Register(&Warnable{ var warmingUpWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: "warming-up", Code: "warming-up",
Title: "Tailscale is starting", Title: "Tailscale is starting",
Severity: SeverityLow, Severity: SeverityLow,
Text: StaticMessage("Tailscale is starting. Please wait."), Text: StaticMessage("Tailscale is starting. Please wait."),
}
}) })

View File

@ -165,7 +165,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
tailscale.com/logpolicy from tailscale.com/ipn/ipnlocal+ tailscale.com/logpolicy from tailscale.com/ipn/ipnlocal+
tailscale.com/logtail from tailscale.com/control/controlclient+ tailscale.com/logtail from tailscale.com/control/controlclient+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/health+ tailscale.com/metrics from tailscale.com/net/tstun+
tailscale.com/net/ace from tailscale.com/control/controlhttp tailscale.com/net/ace from tailscale.com/control/controlhttp
tailscale.com/net/bakedroots from tailscale.com/ipn/ipnlocal+ tailscale.com/net/bakedroots from tailscale.com/ipn/ipnlocal+
💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock 💣 tailscale.com/net/batching from tailscale.com/wgengine/magicsock

View File

@ -1710,7 +1710,7 @@ func (c *Conn) mkReceiveFunc(ruc *RebindingUDPConn, healthItem *health.ReceiveFu
var epCache epAddrEndpointCache var epCache epAddrEndpointCache
return func(buffs [][]byte, sizes []int, eps []conn.Endpoint) (_ int, retErr error) { return func(buffs [][]byte, sizes []int, eps []conn.Endpoint) (_ int, retErr error) {
if healthItem != nil { if buildfeatures.HasHealth && healthItem != nil {
healthItem.Enter() healthItem.Enter()
defer healthItem.Exit() defer healthItem.Exit()
defer func() { defer func() {