diff --git a/internal/pkg/etcd/etcd.go b/internal/pkg/etcd/etcd.go index ed69ba9a0..ca4d2e191 100644 --- a/internal/pkg/etcd/etcd.go +++ b/internal/pkg/etcd/etcd.go @@ -14,6 +14,7 @@ import ( "time" "github.com/cosi-project/runtime/pkg/state" + "github.com/siderolabs/go-retry/retry" "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" "go.etcd.io/etcd/client/pkg/v3/transport" @@ -155,7 +156,19 @@ func (c *Client) LeaveCluster(ctx context.Context, st state.State) error { return err } - if err := c.RemoveMemberByMemberID(ctx, memberID); err != nil { + if err := retry.Constant(5*time.Minute, retry.WithUnits(10*time.Second)).RetryWithContext(ctx, func(ctx context.Context) error { + err := c.RemoveMemberByMemberID(ctx, memberID) + if err == nil { + return nil + } + + if errors.Is(err, rpctypes.ErrUnhealthy) { + // unhealthy is returned when the member hasn't established connections with quorum other members + return retry.ExpectedError(err) + } + + return err + }); err != nil { return err } diff --git a/internal/pkg/etcd/local.go b/internal/pkg/etcd/local.go index a88d8a8d3..ee6162954 100644 --- a/internal/pkg/etcd/local.go +++ b/internal/pkg/etcd/local.go @@ -7,6 +7,7 @@ package etcd import ( "context" "fmt" + "time" "github.com/cosi-project/runtime/pkg/safe" "github.com/cosi-project/runtime/pkg/state" @@ -16,10 +17,14 @@ import ( // GetLocalMemberID gets the etcd member id of the local node via resources. func GetLocalMemberID(ctx context.Context, s state.State) (uint64, error) { - member, err := safe.ReaderGet[*etcd.Member]( + ctx, cancel := context.WithTimeout(ctx, 3*time.Minute) + defer cancel() + + member, err := safe.StateWatchFor[*etcd.Member]( ctx, s, etcd.NewMember(etcd.NamespaceName, etcd.LocalMemberID).Metadata(), + state.WithEventTypes(state.Created), ) if err != nil { return 0, fmt.Errorf("failed to get local etcd member ID: %w", err)