chore: restructure k8s component health checks

Re-structure k8s components health checks so that K8s health can be
independently checked without auxiliary components being up.

Signed-off-by: Noel Georgi <git@frezbo.dev>
This commit is contained in:
Noel Georgi 2024-08-19 14:29:14 +05:30
parent e193e7db98
commit c312a46f69
No known key found for this signature in database
GPG Key ID: 21A9F444075C9E36

View File

@ -6,6 +6,7 @@ package check
import (
"context"
"slices"
"time"
"github.com/siderolabs/talos/pkg/conditions"
@ -14,14 +15,10 @@ import (
// DefaultClusterChecks returns a set of default Talos cluster readiness checks.
func DefaultClusterChecks() []ClusterCheck {
return append(PreBootSequenceChecks(), []ClusterCheck{
// wait for all the nodes to report in at k8s level
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error {
return K8sAllNodesReportedAssertion(ctx, cluster)
}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig
},
return slices.Concat(
PreBootSequenceChecks(),
K8sComponentsReadinessChecks(),
[]ClusterCheck{
// wait for all the nodes to report ready at k8s level
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error {
@ -29,20 +26,6 @@ func DefaultClusterChecks() []ClusterCheck {
}, 10*time.Minute, 5*time.Second)
},
// wait for k8s control plane static pods
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error {
return K8sControlPlaneStaticPods(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for HA k8s control plane
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error {
return K8sFullControlPlaneAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for kube-proxy to report ready
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error {
@ -81,7 +64,36 @@ func DefaultClusterChecks() []ClusterCheck {
return K8sAllNodesSchedulableAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
}...)
},
)
}
// K8sComponentsReadinessChecks returns a set of K8s cluster readiness checks which are specific to the k8s components
// being up and running. This test can be skipped if the cluster is set to use a custom CNI, as the checks won't be healthy
// until the CNI is up and running.
func K8sComponentsReadinessChecks() []ClusterCheck {
return []ClusterCheck{
// wait for all the nodes to report in at k8s level
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error {
return K8sAllNodesReportedAssertion(ctx, cluster)
}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig
},
// wait for k8s control plane static pods
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error {
return K8sControlPlaneStaticPods(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for HA k8s control plane
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error {
return K8sFullControlPlaneAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
}
}
// ExtraClusterChecks returns a set of additional Talos cluster readiness checks which work only for newer versions of Talos.