talos/pkg/cluster/bootstrap.go
Andrey Smirnov 60d7360944 fix: ignore deadline exceeded errors on bootstrap
With the recent changes, bootstrap API might wait for the time to be in
sync (as the apid is launched before time is sync). We set timeout to
500ms for the bootstrap API call, so there's a chance that a call might
time out, and we should ignore it.

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
2021-06-30 06:59:36 -07:00

90 lines
2.3 KiB
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package cluster
import (
"context"
"errors"
"fmt"
"io"
"sort"
"strings"
"time"
"github.com/talos-systems/go-retry/retry"
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/client"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
)
// APIBootstrapper bootstraps cluster via Talos API.
type APIBootstrapper struct {
ClientProvider
Info
}
// Bootstrap the cluster via the API.
//
// Bootstrap implements Bootstrapper interface.
func (s *APIBootstrapper) Bootstrap(ctx context.Context, out io.Writer) error {
cli, err := s.Client()
if err != nil {
return err
}
controlPlaneNodes := s.NodesByType(machine.TypeControlPlane)
if len(controlPlaneNodes) == 0 {
return fmt.Errorf("no control plane nodes to bootstrap")
}
sort.Strings(controlPlaneNodes)
node := controlPlaneNodes[0]
nodeCtx := client.WithNodes(ctx, node)
fmt.Fprintln(out, "waiting for API")
err = retry.Constant(5*time.Minute, retry.WithUnits(500*time.Millisecond)).Retry(func() error {
retryCtx, cancel := context.WithTimeout(nodeCtx, 500*time.Millisecond)
defer cancel()
if _, err = cli.Version(retryCtx); err != nil {
return retry.ExpectedError(err)
}
return nil
})
if err != nil {
return err
}
fmt.Fprintln(out, "bootstrapping cluster")
return retry.Constant(backoff.DefaultConfig.MaxDelay, retry.WithUnits(100*time.Millisecond)).Retry(func() error {
retryCtx, cancel := context.WithTimeout(nodeCtx, 500*time.Millisecond)
defer cancel()
if err = cli.Bootstrap(retryCtx, &machineapi.BootstrapRequest{}); err != nil {
switch {
case errors.Is(err, context.DeadlineExceeded):
return retry.ExpectedError(err)
case status.Code(err) == codes.FailedPrecondition || status.Code(err) == codes.DeadlineExceeded:
return retry.ExpectedError(err)
case strings.Contains(err.Error(), "connection refused"):
return retry.ExpectedError(err)
}
return err
}
return nil
})
}