mirror of
				https://github.com/coredns/coredns.git
				synced 2025-10-31 16:21:58 +01:00 
			
		
		
		
	Add exponential backoff to healthcheck (#3643)
Move exponential backoff initialization to Start() Signed-off-by: RickyRajinder <singh.sangh@gmail.com> Move comment Increase max interval and update README Remove trailing whitespace Change Start() param name back to interval
This commit is contained in:
		
							parent
							
								
									22cd28a798
								
							
						
					
					
						commit
						efbe4ac5e8
					
				
							
								
								
									
										1
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								go.mod
									
									
									
									
									
								
							| @ -12,6 +12,7 @@ require ( | |||||||
| 	github.com/apache/thrift v0.13.0 // indirect | 	github.com/apache/thrift v0.13.0 // indirect | ||||||
| 	github.com/aws/aws-sdk-go v1.28.9 | 	github.com/aws/aws-sdk-go v1.28.9 | ||||||
| 	github.com/caddyserver/caddy v1.0.4 | 	github.com/caddyserver/caddy v1.0.4 | ||||||
|  | 	github.com/cenkalti/backoff/v4 v4.0.0 | ||||||
| 	github.com/coredns/federation v0.0.0-20190818181423-e032b096babe | 	github.com/coredns/federation v0.0.0-20190818181423-e032b096babe | ||||||
| 	github.com/coreos/go-systemd v0.0.0-20190212144455-93d5ec2c7f76 // indirect | 	github.com/coreos/go-systemd v0.0.0-20190212144455-93d5ec2c7f76 // indirect | ||||||
| 	github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect | 	github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect | ||||||
|  | |||||||
							
								
								
									
										2
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								go.sum
									
									
									
									
									
								
							| @ -78,6 +78,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB | |||||||
| github.com/caddyserver/caddy v1.0.4 h1:wwuGSkUHo6RZ3oMpeTt7J09WBB87X5o+IZN4dKehcQE= | github.com/caddyserver/caddy v1.0.4 h1:wwuGSkUHo6RZ3oMpeTt7J09WBB87X5o+IZN4dKehcQE= | ||||||
| github.com/caddyserver/caddy v1.0.4/go.mod h1:uruyfVsyMcDb3IOzSKsi1x0wOjy1my/PxOSTcD+24jM= | github.com/caddyserver/caddy v1.0.4/go.mod h1:uruyfVsyMcDb3IOzSKsi1x0wOjy1my/PxOSTcD+24jM= | ||||||
| github.com/cenkalti/backoff/v3 v3.0.0/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs= | github.com/cenkalti/backoff/v3 v3.0.0/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs= | ||||||
|  | github.com/cenkalti/backoff/v4 v4.0.0 h1:6VeaLF9aI+MAUQ95106HwWzYZgJJpZ4stumjj6RFYAU= | ||||||
|  | github.com/cenkalti/backoff/v4 v4.0.0/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= | ||||||
| github.com/census-instrumentation/opencensus-proto v0.2.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= | github.com/census-instrumentation/opencensus-proto v0.2.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= | ||||||
| github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= | ||||||
| github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= | github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= | ||||||
|  | |||||||
| @ -9,8 +9,10 @@ | |||||||
| The *forward* plugin re-uses already opened sockets to the upstreams. It supports UDP, TCP and | The *forward* plugin re-uses already opened sockets to the upstreams. It supports UDP, TCP and | ||||||
| DNS-over-TLS and uses in band health checking. | DNS-over-TLS and uses in band health checking. | ||||||
| 
 | 
 | ||||||
| When it detects an error a health check is performed. This checks runs in a loop, every *0.5s*, for | When it detects an error a health check is performed. This checks runs in a loop, | ||||||
| as long as the upstream reports unhealthy. Once healthy we stop health checking (until the next | starting with a *0.5s* interval and exponentially backing off with randomized intervals | ||||||
|  | up to *60s* for as long as the upstream reports unhealthy. The exponential backoff | ||||||
|  | will reset to *0.5s* after 15 minutes. Once healthy we stop health checking (until the next | ||||||
| error). The health checks use a recursive DNS query (`. IN NS`) to get upstream health. Any response | error). The health checks use a recursive DNS query (`. IN NS`) to get upstream health. Any response | ||||||
| that is not a network error (REFUSED, NOTIMPL, SERVFAIL, etc) is taken as a healthy upstream. The | that is not a network error (REFUSED, NOTIMPL, SERVFAIL, etc) is taken as a healthy upstream. The | ||||||
| health check uses the same protocol as specified in **TO**. If `max_fails` is set to 0, no checking | health check uses the same protocol as specified in **TO**. If `max_fails` is set to 0, no checking | ||||||
|  | |||||||
| @ -5,6 +5,8 @@ package up | |||||||
| import ( | import ( | ||||||
| 	"sync" | 	"sync" | ||||||
| 	"time" | 	"time" | ||||||
|  | 
 | ||||||
|  | 	"github.com/cenkalti/backoff/v4" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| // Probe is used to run a single Func until it returns true (indicating a target is healthy). If an Func | // Probe is used to run a single Func until it returns true (indicating a target is healthy). If an Func | ||||||
| @ -13,8 +15,7 @@ import ( | |||||||
| type Probe struct { | type Probe struct { | ||||||
| 	sync.Mutex | 	sync.Mutex | ||||||
| 	inprogress int | 	inprogress int | ||||||
| 	interval   time.Duration | 	expBackoff backoff.ExponentialBackOff | ||||||
| 	max        time.Duration |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Func is used to determine if a target is alive. If so this function must return nil. | // Func is used to determine if a target is alive. If so this function must return nil. | ||||||
| @ -31,7 +32,13 @@ func (p *Probe) Do(f Func) { | |||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 	p.inprogress = active | 	p.inprogress = active | ||||||
| 	interval := p.interval | 	interval := p.expBackoff.NextBackOff() | ||||||
|  | 	// If exponential backoff has reached the maximum elapsed time (15 minutes), | ||||||
|  | 	// reset it and try again | ||||||
|  | 	if interval == -1 { | ||||||
|  | 		p.expBackoff.Reset() | ||||||
|  | 		interval = p.expBackoff.NextBackOff() | ||||||
|  | 	} | ||||||
| 	p.Unlock() | 	p.Unlock() | ||||||
| 	// Passed the lock. Now run f for as long it returns false. If a true is returned | 	// Passed the lock. Now run f for as long it returns false. If a true is returned | ||||||
| 	// we return from the goroutine and we can accept another Func to run. | 	// we return from the goroutine and we can accept another Func to run. | ||||||
| @ -42,9 +49,6 @@ func (p *Probe) Do(f Func) { | |||||||
| 				break | 				break | ||||||
| 			} | 			} | ||||||
| 			time.Sleep(interval) | 			time.Sleep(interval) | ||||||
| 			if i%2 == 0 && i < 4 { // 4 is 2 doubles, so no need to increase anymore - this is *also* checked in double() |  | ||||||
| 				p.double() |  | ||||||
| 			} |  | ||||||
| 			p.Lock() | 			p.Lock() | ||||||
| 			if p.inprogress == stop { | 			if p.inprogress == stop { | ||||||
| 				p.Unlock() | 				p.Unlock() | ||||||
| @ -60,15 +64,6 @@ func (p *Probe) Do(f Func) { | |||||||
| 	}() | 	}() | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (p *Probe) double() { |  | ||||||
| 	p.Lock() |  | ||||||
| 	p.interval *= 2 |  | ||||||
| 	if p.interval > p.max { |  | ||||||
| 		p.interval = p.max |  | ||||||
| 	} |  | ||||||
| 	p.Unlock() |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Stop stops the probing. | // Stop stops the probing. | ||||||
| func (p *Probe) Stop() { | func (p *Probe) Stop() { | ||||||
| 	p.Lock() | 	p.Lock() | ||||||
| @ -77,10 +72,20 @@ func (p *Probe) Stop() { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Start will initialize the probe manager, after which probes can be initiated with Do. | // Start will initialize the probe manager, after which probes can be initiated with Do. | ||||||
|  | // Initializes exponential backoff using the given interval duration | ||||||
| func (p *Probe) Start(interval time.Duration) { | func (p *Probe) Start(interval time.Duration) { | ||||||
| 	p.Lock() | 	p.Lock() | ||||||
| 	p.interval = interval | 	eB := &backoff.ExponentialBackOff{ | ||||||
| 	p.max = interval * multiplier | 		InitialInterval:     interval, | ||||||
|  | 		RandomizationFactor: backoff.DefaultRandomizationFactor, | ||||||
|  | 		Multiplier:          backoff.DefaultMultiplier, | ||||||
|  | 		MaxInterval:         backoff.DefaultMaxInterval, | ||||||
|  | 		MaxElapsedTime:      backoff.DefaultMaxElapsedTime, | ||||||
|  | 		Stop:                backoff.Stop, | ||||||
|  | 		Clock:               backoff.SystemClock, | ||||||
|  | 	} | ||||||
|  | 	p.expBackoff = *eB | ||||||
|  | 	p.expBackoff.Reset() | ||||||
| 	p.Unlock() | 	p.Unlock() | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -88,6 +93,4 @@ const ( | |||||||
| 	idle = iota | 	idle = iota | ||||||
| 	active | 	active | ||||||
| 	stop | 	stop | ||||||
| 
 |  | ||||||
| 	multiplier = 4 |  | ||||||
| ) | ) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user