mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-05 04:31:26 +02:00
Handling of multiple endpoints has already been implemented in #2094. This PR enables round-robin policy so that grpc picks up new endpoint for each call (and not send each request to the first control plane node). Endpoint list is randomized to handle cases when only one request is going to be sent, so that it doesn't go always to the first node in the list. gprc handles dead/unresponsive nodes automatically for us. `talosctl cluster create` and provision tests switched to use client-side load balancer for Talos API. On the additional improvements we got: * `talosctl` now reports correct node IP when using commands without `-n`, not the loadbalancer IP (if using multiple endpoints of course) * loadbalancer can't provide reliable handling of errors when upstream server is unresponsive or there're no upstreams available, grpc returns much more helpful errors Fixes #1641 Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
78 lines
3.1 KiB
Go
78 lines
3.1 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
package check
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"time"
|
|
|
|
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
|
|
"github.com/talos-systems/talos/internal/pkg/conditions"
|
|
)
|
|
|
|
// DefaultClusterChecks returns a set of default Talos cluster readiness checks.
|
|
func DefaultClusterChecks() []ClusterCheck {
|
|
return []ClusterCheck{
|
|
// wait for etcd to be healthy on all control plane nodes
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("etcd to be healthy", func(ctx context.Context) error {
|
|
return ServiceHealthAssertion(ctx, cluster, "etcd", WithNodeTypes(runtime.MachineTypeInit, runtime.MachineTypeControlPlane))
|
|
}, 5*time.Minute, 5*time.Second)
|
|
},
|
|
// wait for bootkube to finish on init node
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("bootkube to finish", func(ctx context.Context) error {
|
|
err := ServiceStateAssertion(ctx, cluster, "bootkube", "Finished", "Skipped")
|
|
if err != nil {
|
|
if errors.Is(err, ErrServiceNotFound) {
|
|
return nil
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}, 5*time.Minute, 5*time.Second)
|
|
},
|
|
// wait for apid to be ready on all the nodes
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("apid to be ready", func(ctx context.Context) error {
|
|
return ApidReadyAssertion(ctx, cluster)
|
|
}, 2*time.Minute, 5*time.Second)
|
|
},
|
|
// wait for all the nodes to report in at k8s level
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error {
|
|
return K8sAllNodesReportedAssertion(ctx, cluster)
|
|
}, 5*time.Minute, 5*time.Second)
|
|
},
|
|
// wait for all the nodes to report ready at k8s level
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error {
|
|
return K8sAllNodesReadyAssertion(ctx, cluster)
|
|
}, 10*time.Minute, 5*time.Second)
|
|
},
|
|
// wait for HA k8s control plane
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error {
|
|
return K8sFullControlPlaneAssertion(ctx, cluster)
|
|
}, 5*time.Minute, 5*time.Second)
|
|
},
|
|
// wait for kube-proxy to report ready
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error {
|
|
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
|
|
}, 3*time.Minute, 5*time.Second)
|
|
},
|
|
// wait for coredns to report ready
|
|
func(cluster ClusterInfo) conditions.Condition {
|
|
return conditions.PollingCondition("coredns to report ready", func(ctx context.Context) error {
|
|
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-dns")
|
|
}, 3*time.Minute, 5*time.Second)
|
|
},
|
|
}
|
|
}
|