mirror of
https://github.com/juanfont/headscale.git
synced 2026-05-04 19:46:12 +02:00
state: per-goroutine deadline in HA probe cycle
`time.After(ProbeTimeout)` returned a single channel shared by every probe goroutine in the cycle. Only the first goroutine to receive the deadline tick drains the channel; any other goroutine still waiting on its `responseCh` is then stuck forever, `wg.Wait()` never returns, and the scheduler loop in `app.go` stalls on the next tick. The condition fires whenever two or more nodes time out in the same cycle — common under cable-pull where IsOnline lags reality and both routers stay in the candidate set as half-open TCP. Move the timer inside each goroutine so every probe has its own deadline. Updates #3234
This commit is contained in:
parent
d1443a431c
commit
94ec607bca
@ -71,8 +71,6 @@ func (p *HAHealthProber) ProbeOnce(
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
deadline := time.After(p.cfg.ProbeTimeout)
|
||||
|
||||
for _, id := range nodeIDs {
|
||||
if !p.isConnected(id) {
|
||||
log.Debug().
|
||||
@ -90,6 +88,9 @@ func (p *HAHealthProber) ProbeOnce(
|
||||
}))
|
||||
|
||||
wg.Go(func() {
|
||||
timer := time.NewTimer(p.cfg.ProbeTimeout)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case latency := <-responseCh:
|
||||
log.Debug().
|
||||
@ -105,7 +106,7 @@ func (p *HAHealthProber) ProbeOnce(
|
||||
Msg("HA probe: node recovered, recalculating primaries")
|
||||
}
|
||||
|
||||
case <-deadline:
|
||||
case <-timer.C:
|
||||
p.state.CancelPing(pingID)
|
||||
|
||||
if !p.isConnected(id) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user