mirror of
				https://github.com/siderolabs/talos.git
				synced 2025-10-31 08:21:25 +01:00 
			
		
		
		
	chore: restructure k8s component health checks
Re-structure k8s components health checks so that K8s health can be independently checked without auxiliary components being up. Signed-off-by: Noel Georgi <git@frezbo.dev>
This commit is contained in:
		
							parent
							
								
									e193e7db98
								
							
						
					
					
						commit
						c312a46f69
					
				| @ -6,6 +6,7 @@ package check | |||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"context" | 	"context" | ||||||
|  | 	"slices" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| 	"github.com/siderolabs/talos/pkg/conditions" | 	"github.com/siderolabs/talos/pkg/conditions" | ||||||
| @ -14,14 +15,10 @@ import ( | |||||||
| 
 | 
 | ||||||
| // DefaultClusterChecks returns a set of default Talos cluster readiness checks. | // DefaultClusterChecks returns a set of default Talos cluster readiness checks. | ||||||
| func DefaultClusterChecks() []ClusterCheck { | func DefaultClusterChecks() []ClusterCheck { | ||||||
| 	return append(PreBootSequenceChecks(), []ClusterCheck{ | 	return slices.Concat( | ||||||
| 		// wait for all the nodes to report in at k8s level | 		PreBootSequenceChecks(), | ||||||
| 		func(cluster ClusterInfo) conditions.Condition { | 		K8sComponentsReadinessChecks(), | ||||||
| 			return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error { | 		[]ClusterCheck{ | ||||||
| 				return K8sAllNodesReportedAssertion(ctx, cluster) |  | ||||||
| 			}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig |  | ||||||
| 		}, |  | ||||||
| 
 |  | ||||||
| 			// wait for all the nodes to report ready at k8s level | 			// wait for all the nodes to report ready at k8s level | ||||||
| 			func(cluster ClusterInfo) conditions.Condition { | 			func(cluster ClusterInfo) conditions.Condition { | ||||||
| 				return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error { | 				return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error { | ||||||
| @ -29,20 +26,6 @@ func DefaultClusterChecks() []ClusterCheck { | |||||||
| 				}, 10*time.Minute, 5*time.Second) | 				}, 10*time.Minute, 5*time.Second) | ||||||
| 			}, | 			}, | ||||||
| 
 | 
 | ||||||
| 		// wait for k8s control plane static pods |  | ||||||
| 		func(cluster ClusterInfo) conditions.Condition { |  | ||||||
| 			return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error { |  | ||||||
| 				return K8sControlPlaneStaticPods(ctx, cluster) |  | ||||||
| 			}, 5*time.Minute, 5*time.Second) |  | ||||||
| 		}, |  | ||||||
| 
 |  | ||||||
| 		// wait for HA k8s control plane |  | ||||||
| 		func(cluster ClusterInfo) conditions.Condition { |  | ||||||
| 			return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error { |  | ||||||
| 				return K8sFullControlPlaneAssertion(ctx, cluster) |  | ||||||
| 			}, 5*time.Minute, 5*time.Second) |  | ||||||
| 		}, |  | ||||||
| 
 |  | ||||||
| 			// wait for kube-proxy to report ready | 			// wait for kube-proxy to report ready | ||||||
| 			func(cluster ClusterInfo) conditions.Condition { | 			func(cluster ClusterInfo) conditions.Condition { | ||||||
| 				return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error { | 				return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error { | ||||||
| @ -81,7 +64,36 @@ func DefaultClusterChecks() []ClusterCheck { | |||||||
| 					return K8sAllNodesSchedulableAssertion(ctx, cluster) | 					return K8sAllNodesSchedulableAssertion(ctx, cluster) | ||||||
| 				}, 5*time.Minute, 5*time.Second) | 				}, 5*time.Minute, 5*time.Second) | ||||||
| 			}, | 			}, | ||||||
| 	}...) | 		}, | ||||||
|  | 	) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // K8sComponentsReadinessChecks returns a set of K8s cluster readiness checks which are specific to the k8s components | ||||||
|  | // being up and running. This test can be skipped if the cluster is set to use a custom CNI, as the checks won't be healthy | ||||||
|  | // until the CNI is up and running. | ||||||
|  | func K8sComponentsReadinessChecks() []ClusterCheck { | ||||||
|  | 	return []ClusterCheck{ | ||||||
|  | 		// wait for all the nodes to report in at k8s level | ||||||
|  | 		func(cluster ClusterInfo) conditions.Condition { | ||||||
|  | 			return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error { | ||||||
|  | 				return K8sAllNodesReportedAssertion(ctx, cluster) | ||||||
|  | 			}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig | ||||||
|  | 		}, | ||||||
|  | 
 | ||||||
|  | 		// wait for k8s control plane static pods | ||||||
|  | 		func(cluster ClusterInfo) conditions.Condition { | ||||||
|  | 			return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error { | ||||||
|  | 				return K8sControlPlaneStaticPods(ctx, cluster) | ||||||
|  | 			}, 5*time.Minute, 5*time.Second) | ||||||
|  | 		}, | ||||||
|  | 
 | ||||||
|  | 		// wait for HA k8s control plane | ||||||
|  | 		func(cluster ClusterInfo) conditions.Condition { | ||||||
|  | 			return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error { | ||||||
|  | 				return K8sFullControlPlaneAssertion(ctx, cluster) | ||||||
|  | 			}, 5*time.Minute, 5*time.Second) | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // ExtraClusterChecks returns a set of additional Talos cluster readiness checks which work only for newer versions of Talos. | // ExtraClusterChecks returns a set of additional Talos cluster readiness checks which work only for newer versions of Talos. | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user