mirror of
https://github.com/juanfont/headscale.git
synced 2026-05-05 03:56:10 +02:00
integration: retry transient docker network ops
Libnetwork endpoint cleanup is eventually consistent. A back-to-back disconnect+connect on the same network can race teardown and return a transient error. Wrap the daemon calls in bounded exponential backoff so TestHASubnetRouterFailoverDockerDisconnect no longer flakes on phase 4c reconnect. Fixes #3234
This commit is contained in:
parent
3d5c0af4e7
commit
155e42f892
@ -1,11 +1,14 @@
|
||||
package dockertestutil
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff/v5"
|
||||
"github.com/juanfont/headscale/hscontrol/util"
|
||||
"github.com/ory/dockertest/v3"
|
||||
"github.com/ory/dockertest/v3/docker"
|
||||
@ -13,6 +16,20 @@ import (
|
||||
|
||||
var ErrContainerNotFound = errors.New("container not found")
|
||||
|
||||
// retryDockerOp absorbs eventual-consistency races in libnetwork endpoint cleanup.
|
||||
func retryDockerOp(ctx context.Context, op func() error) error {
|
||||
_, err := backoff.Retry(ctx, func() (struct{}, error) {
|
||||
err := op()
|
||||
if err != nil {
|
||||
return struct{}{}, err
|
||||
}
|
||||
|
||||
return struct{}{}, nil
|
||||
}, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(30*time.Second))
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func GetFirstOrCreateNetwork(pool *dockertest.Pool, name string) (*dockertest.Network, error) {
|
||||
return GetFirstOrCreateNetworkWithSubnet(pool, name, "")
|
||||
}
|
||||
@ -72,13 +89,6 @@ func AddContainerToNetwork(
|
||||
return err
|
||||
}
|
||||
|
||||
err = pool.Client.ConnectNetwork(network.Network.ID, docker.NetworkConnectionOptions{
|
||||
Container: containers[0].ID,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO(kradalby): This doesn't work reliably, but calling the exact same functions
|
||||
// seem to work fine...
|
||||
// if container, ok := pool.ContainerByName("/" + testContainer); ok {
|
||||
@ -88,7 +98,11 @@ func AddContainerToNetwork(
|
||||
// }
|
||||
// }
|
||||
|
||||
return nil
|
||||
return retryDockerOp(context.Background(), func() error {
|
||||
return pool.Client.ConnectNetwork(network.Network.ID, docker.NetworkConnectionOptions{
|
||||
Container: containers[0].ID,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// DisconnectContainerFromNetwork removes the container from network at
|
||||
@ -115,9 +129,11 @@ func DisconnectContainerFromNetwork(
|
||||
return fmt.Errorf("%w: %s", ErrContainerNotFound, testContainer)
|
||||
}
|
||||
|
||||
return pool.Client.DisconnectNetwork(network.Network.ID, docker.NetworkConnectionOptions{
|
||||
Container: containers[0].ID,
|
||||
Force: true,
|
||||
return retryDockerOp(context.Background(), func() error {
|
||||
return pool.Client.DisconnectNetwork(network.Network.ID, docker.NetworkConnectionOptions{
|
||||
Container: containers[0].ID,
|
||||
Force: true,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
56
integration/dockertestutil/network_test.go
Normal file
56
integration/dockertestutil/network_test.go
Normal file
@ -0,0 +1,56 @@
|
||||
package dockertestutil
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
errTransientEndpointExists = errors.New("endpoint with name foo already exists in network bar")
|
||||
errPermanent = errors.New("permanent error")
|
||||
)
|
||||
|
||||
func TestRetryDockerOp_RecoversFromTransient(t *testing.T) {
|
||||
var attempts atomic.Int32
|
||||
|
||||
op := func() error {
|
||||
if attempts.Add(1) < 3 {
|
||||
return errTransientEndpointExists
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
err := retryDockerOp(context.Background(), op)
|
||||
if err != nil {
|
||||
t.Fatalf("retryDockerOp should recover from 2 transient errors, got: %v", err)
|
||||
}
|
||||
|
||||
if got := attempts.Load(); got != 3 {
|
||||
t.Fatalf("expected 3 attempts, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryDockerOp_RespectsContextCancellation(t *testing.T) {
|
||||
op := func() error {
|
||||
return errPermanent
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
err := retryDockerOp(ctx, op)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
if err == nil {
|
||||
t.Fatal("retryDockerOp should fail when op always errors")
|
||||
}
|
||||
|
||||
if elapsed > 5*time.Second {
|
||||
t.Fatalf("retryDockerOp should honour ctx deadline (~200ms), took %s", elapsed)
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user