mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-29 15:31:12 +01:00
chore: restructure k8s component health checks
Re-structure k8s components health checks so that K8s health can be independently checked without auxiliary components being up. Signed-off-by: Noel Georgi <git@frezbo.dev>
This commit is contained in:
parent
e193e7db98
commit
c312a46f69
@ -6,6 +6,7 @@ package check
|
||||
|
||||
import (
|
||||
"context"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"github.com/siderolabs/talos/pkg/conditions"
|
||||
@ -14,7 +15,64 @@ import (
|
||||
|
||||
// DefaultClusterChecks returns a set of default Talos cluster readiness checks.
|
||||
func DefaultClusterChecks() []ClusterCheck {
|
||||
return append(PreBootSequenceChecks(), []ClusterCheck{
|
||||
return slices.Concat(
|
||||
PreBootSequenceChecks(),
|
||||
K8sComponentsReadinessChecks(),
|
||||
[]ClusterCheck{
|
||||
// wait for all the nodes to report ready at k8s level
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error {
|
||||
return K8sAllNodesReadyAssertion(ctx, cluster)
|
||||
}, 10*time.Minute, 5*time.Second)
|
||||
},
|
||||
|
||||
// wait for kube-proxy to report ready
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error {
|
||||
present, err := DaemonSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !present {
|
||||
return conditions.ErrSkipAssertion
|
||||
}
|
||||
|
||||
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
|
||||
}, 3*time.Minute, 5*time.Second)
|
||||
},
|
||||
|
||||
// wait for coredns to report ready
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("coredns to report ready", func(ctx context.Context) error {
|
||||
present, err := ReplicaSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-dns")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !present {
|
||||
return conditions.ErrSkipAssertion
|
||||
}
|
||||
|
||||
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-dns")
|
||||
}, 3*time.Minute, 5*time.Second)
|
||||
},
|
||||
|
||||
// wait for all the nodes to be schedulable
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("all k8s nodes to report schedulable", func(ctx context.Context) error {
|
||||
return K8sAllNodesSchedulableAssertion(ctx, cluster)
|
||||
}, 5*time.Minute, 5*time.Second)
|
||||
},
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// K8sComponentsReadinessChecks returns a set of K8s cluster readiness checks which are specific to the k8s components
|
||||
// being up and running. This test can be skipped if the cluster is set to use a custom CNI, as the checks won't be healthy
|
||||
// until the CNI is up and running.
|
||||
func K8sComponentsReadinessChecks() []ClusterCheck {
|
||||
return []ClusterCheck{
|
||||
// wait for all the nodes to report in at k8s level
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error {
|
||||
@ -22,13 +80,6 @@ func DefaultClusterChecks() []ClusterCheck {
|
||||
}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig
|
||||
},
|
||||
|
||||
// wait for all the nodes to report ready at k8s level
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error {
|
||||
return K8sAllNodesReadyAssertion(ctx, cluster)
|
||||
}, 10*time.Minute, 5*time.Second)
|
||||
},
|
||||
|
||||
// wait for k8s control plane static pods
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error {
|
||||
@ -42,46 +93,7 @@ func DefaultClusterChecks() []ClusterCheck {
|
||||
return K8sFullControlPlaneAssertion(ctx, cluster)
|
||||
}, 5*time.Minute, 5*time.Second)
|
||||
},
|
||||
|
||||
// wait for kube-proxy to report ready
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error {
|
||||
present, err := DaemonSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !present {
|
||||
return conditions.ErrSkipAssertion
|
||||
}
|
||||
|
||||
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
|
||||
}, 3*time.Minute, 5*time.Second)
|
||||
},
|
||||
|
||||
// wait for coredns to report ready
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("coredns to report ready", func(ctx context.Context) error {
|
||||
present, err := ReplicaSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-dns")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !present {
|
||||
return conditions.ErrSkipAssertion
|
||||
}
|
||||
|
||||
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-dns")
|
||||
}, 3*time.Minute, 5*time.Second)
|
||||
},
|
||||
|
||||
// wait for all the nodes to be schedulable
|
||||
func(cluster ClusterInfo) conditions.Condition {
|
||||
return conditions.PollingCondition("all k8s nodes to report schedulable", func(ctx context.Context) error {
|
||||
return K8sAllNodesSchedulableAssertion(ctx, cluster)
|
||||
}, 5*time.Minute, 5*time.Second)
|
||||
},
|
||||
}...)
|
||||
}
|
||||
}
|
||||
|
||||
// ExtraClusterChecks returns a set of additional Talos cluster readiness checks which work only for newer versions of Talos.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user