clusterStart: sequential and ordered node starts & nodeCreate: do not copy status
This commit is contained in:
parent
b2162b1618
commit
1c61130fdf
@ -50,7 +50,7 @@ func NewCmdNodeCreate() *cobra.Command {
|
|||||||
nodes, cluster := parseCreateNodeCmd(cmd, args)
|
nodes, cluster := parseCreateNodeCmd(cmd, args)
|
||||||
if err := k3dc.NodeAddToClusterMulti(cmd.Context(), runtimes.SelectedRuntime, nodes, cluster, createNodeOpts); err != nil {
|
if err := k3dc.NodeAddToClusterMulti(cmd.Context(), runtimes.SelectedRuntime, nodes, cluster, createNodeOpts); err != nil {
|
||||||
log.Errorf("Failed to add nodes to cluster '%s'", cluster.Name)
|
log.Errorf("Failed to add nodes to cluster '%s'", cluster.Name)
|
||||||
log.Errorln(err)
|
log.Fatalln(err)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -802,97 +802,106 @@ func generateNodeName(cluster string, role k3d.Role, suffix int) string {
|
|||||||
func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster, startClusterOpts types.ClusterStartOpts) error {
|
func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster, startClusterOpts types.ClusterStartOpts) error {
|
||||||
log.Infof("Starting cluster '%s'", cluster.Name)
|
log.Infof("Starting cluster '%s'", cluster.Name)
|
||||||
|
|
||||||
start := time.Now()
|
|
||||||
|
|
||||||
if startClusterOpts.Timeout > 0*time.Second {
|
if startClusterOpts.Timeout > 0*time.Second {
|
||||||
var cancel context.CancelFunc
|
var cancel context.CancelFunc
|
||||||
ctx, cancel = context.WithTimeout(ctx, startClusterOpts.Timeout)
|
ctx, cancel = context.WithTimeout(ctx, startClusterOpts.Timeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sort the nodes into categories
|
||||||
|
var initNode *k3d.Node
|
||||||
|
var servers []*k3d.Node
|
||||||
|
var agents []*k3d.Node
|
||||||
|
var aux []*k3d.Node
|
||||||
|
for _, n := range cluster.Nodes {
|
||||||
|
if n.Role == k3d.ServerRole {
|
||||||
|
if n.ServerOpts.IsInit {
|
||||||
|
initNode = n
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
servers = append(servers, n)
|
||||||
|
} else if n.Role == k3d.AgentRole {
|
||||||
|
agents = append(agents, n)
|
||||||
|
} else {
|
||||||
|
aux = append(aux, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infoln("Servers before sort:")
|
||||||
|
for i, n := range servers {
|
||||||
|
log.Infof("Server %d - %s", i, n.Name)
|
||||||
|
}
|
||||||
|
sort.Slice(servers, func(i, j int) bool {
|
||||||
|
return servers[i].Name < servers[j].Name
|
||||||
|
})
|
||||||
|
log.Infoln("Servers after sort:")
|
||||||
|
for i, n := range servers {
|
||||||
|
log.Infof("Server %d - %s", i, n.Name)
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Init Node
|
* Init Node
|
||||||
*/
|
*/
|
||||||
for _, n := range cluster.Nodes {
|
if initNode != nil {
|
||||||
if n.Role == k3d.ServerRole && n.ServerOpts.IsInit {
|
log.Infoln("Starting the initializing server...")
|
||||||
if err := NodeStart(ctx, runtime, n, k3d.NodeStartOpts{
|
if err := NodeStart(ctx, runtime, initNode, k3d.NodeStartOpts{
|
||||||
Wait: true, // always wait for the init node
|
Wait: true, // always wait for the init node
|
||||||
NodeHooks: startClusterOpts.NodeHooks,
|
NodeHooks: startClusterOpts.NodeHooks,
|
||||||
}); err != nil {
|
ReadyLogMessage: "Running kube-apiserver", // initNode means, that we're using etcd -> this will need quorum, so "k3s is up and running" won't happen right now
|
||||||
return fmt.Errorf("Failed to start initializing server node: %+v", err)
|
}); err != nil {
|
||||||
}
|
return fmt.Errorf("Failed to start initializing server node: %+v", err)
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Other Nodes
|
* Server Nodes
|
||||||
*/
|
*/
|
||||||
failed := 0
|
log.Infoln("Starting servers...")
|
||||||
var serverlb *k3d.Node
|
nodeStartOpts := k3d.NodeStartOpts{
|
||||||
for _, node := range cluster.Nodes {
|
Wait: true,
|
||||||
|
NodeHooks: startClusterOpts.NodeHooks,
|
||||||
// skip the LB, because we want to start it last
|
}
|
||||||
if node.Role == k3d.LoadBalancerRole {
|
for _, serverNode := range servers {
|
||||||
serverlb = node
|
if err := NodeStart(ctx, runtime, serverNode, nodeStartOpts); err != nil {
|
||||||
continue
|
return fmt.Errorf("Failed to start server %s: %+v", serverNode.Name, err)
|
||||||
}
|
|
||||||
|
|
||||||
// skip init node here, as it should be running already
|
|
||||||
if node == cluster.InitNode || node.ServerOpts.IsInit {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if node is running already to avoid waiting forever when checking for the node log message
|
|
||||||
if !node.State.Running {
|
|
||||||
|
|
||||||
nodeStartOpts := k3d.NodeStartOpts{
|
|
||||||
NodeHooks: startClusterOpts.NodeHooks,
|
|
||||||
}
|
|
||||||
|
|
||||||
if node.Role == k3d.ServerRole && startClusterOpts.WaitForServer {
|
|
||||||
nodeStartOpts.Wait = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// start node
|
|
||||||
if err := NodeStart(ctx, runtime, node, nodeStartOpts); err != nil {
|
|
||||||
log.Warningf("Failed to start node '%s': Try to start it manually", node.Name)
|
|
||||||
failed++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
log.Infof("Node '%s' already running", node.Name)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// start serverlb
|
/*
|
||||||
if serverlb != nil {
|
* Agent Nodes
|
||||||
if !serverlb.State.Running {
|
*/
|
||||||
log.Debugln("Starting serverlb...")
|
|
||||||
if err := runtime.StartNode(ctx, serverlb); err != nil { // FIXME: we could run into a nullpointer exception here
|
failedAgents := 0
|
||||||
log.Warningf("Failed to start serverlb '%s' (try to start it manually): %+v", serverlb.Name, err)
|
|
||||||
failed++
|
log.Infoln("Starting agents...")
|
||||||
}
|
for _, agentNode := range agents {
|
||||||
// TODO: avoid `level=fatal msg="starting kubernetes: preparing server: post join: a configuration change is already in progress (5)"`
|
if err := NodeStart(ctx, runtime, agentNode, nodeStartOpts); err != nil {
|
||||||
// ... by scanning for this line in logs and restarting the container in case it appears
|
log.Warnf("Failed to start agent %s: %+v", agentNode.Name, err)
|
||||||
log.Debugf("Starting to wait for loadbalancer node '%s'", serverlb.Name)
|
failedAgents++
|
||||||
readyLogMessage := k3d.ReadyLogMessageByRole[k3d.LoadBalancerRole]
|
|
||||||
if readyLogMessage != "" {
|
|
||||||
if err := NodeWaitForLogMessage(ctx, runtime, serverlb, readyLogMessage, start); err != nil {
|
|
||||||
return fmt.Errorf("Loadbalancer '%s' failed to get ready: %+v", serverlb.Name, err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.Warnf("ClusterStart: Set to wait for node %s to be ready, but there's no target log message defined", serverlb.Name)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.Infof("Serverlb '%s' already running", serverlb.Name)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if failed > 0 {
|
/*
|
||||||
return fmt.Errorf("Failed to start %d nodes: Try to start them manually", failed)
|
* Auxiliary/Helper Nodes
|
||||||
|
*/
|
||||||
|
|
||||||
|
log.Infoln("Starting helpers...")
|
||||||
|
failedHelpers := 0
|
||||||
|
for _, helperNode := range aux {
|
||||||
|
nodeStartOpts := k3d.NodeStartOpts{}
|
||||||
|
if helperNode.Role == k3d.LoadBalancerRole {
|
||||||
|
nodeStartOpts.Wait = true
|
||||||
|
}
|
||||||
|
if err := NodeStart(ctx, runtime, helperNode, nodeStartOpts); err != nil {
|
||||||
|
log.Warnf("Failed to start helper %s: %+v", helperNode.Name, err)
|
||||||
|
failedHelpers++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if failedAgents+failedHelpers > 0 {
|
||||||
|
log.Warnf("%d non-critical (agent or helper) nodes failed to start. You may want to start them manually.", failedAgents+failedHelpers)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,6 +129,10 @@ func NodeAddToCluster(ctx context.Context, runtime runtimes.Runtime, node *k3d.N
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// clear status fields
|
||||||
|
node.State.Running = false
|
||||||
|
node.State.Status = ""
|
||||||
|
|
||||||
if err := NodeRun(ctx, runtime, node, k3d.NodeCreateOpts{}); err != nil {
|
if err := NodeRun(ctx, runtime, node, k3d.NodeCreateOpts{}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -233,6 +237,14 @@ func NodeRun(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, node
|
|||||||
|
|
||||||
// NodeStart starts an existing node
|
// NodeStart starts an existing node
|
||||||
func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, nodeStartOpts k3d.NodeStartOpts) error {
|
func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, nodeStartOpts k3d.NodeStartOpts) error {
|
||||||
|
|
||||||
|
// return early, if the node is already running
|
||||||
|
if node.State.Running {
|
||||||
|
log.Infof("Node %s is already running", node.Name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// execute lifecycle hook actions
|
||||||
for _, hook := range nodeStartOpts.NodeHooks {
|
for _, hook := range nodeStartOpts.NodeHooks {
|
||||||
if hook.Stage == k3d.LifecycleStagePreStart {
|
if hook.Stage == k3d.LifecycleStagePreStart {
|
||||||
log.Tracef("Node %s: Executing preStartAction '%s'", node.Name, reflect.TypeOf(hook))
|
log.Tracef("Node %s: Executing preStartAction '%s'", node.Name, reflect.TypeOf(hook))
|
||||||
@ -241,6 +253,8 @@ func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, no
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// start the node
|
||||||
log.Tracef("Starting node '%s'", node.Name)
|
log.Tracef("Starting node '%s'", node.Name)
|
||||||
|
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
@ -250,10 +264,12 @@ func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, no
|
|||||||
}
|
}
|
||||||
|
|
||||||
if nodeStartOpts.Wait {
|
if nodeStartOpts.Wait {
|
||||||
log.Debugf("Waiting for node %s to get ready", node.Name)
|
if nodeStartOpts.ReadyLogMessage == "" {
|
||||||
readyLogMessage := k3d.ReadyLogMessageByRole[node.Role]
|
nodeStartOpts.ReadyLogMessage = k3d.ReadyLogMessageByRole[node.Role]
|
||||||
if readyLogMessage != "" {
|
}
|
||||||
if err := NodeWaitForLogMessage(ctx, runtime, node, readyLogMessage, startTime); err != nil {
|
if nodeStartOpts.ReadyLogMessage != "" {
|
||||||
|
log.Debugf("Waiting for node %s to get ready (Log: '%s')", node.Name, nodeStartOpts.ReadyLogMessage)
|
||||||
|
if err := NodeWaitForLogMessage(ctx, runtime, node, nodeStartOpts.ReadyLogMessage, startTime); err != nil {
|
||||||
return fmt.Errorf("Node %s failed to get ready: %+v", node.Name, err)
|
return fmt.Errorf("Node %s failed to get ready: %+v", node.Name, err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -224,9 +224,10 @@ type NodeCreateOpts struct {
|
|||||||
|
|
||||||
// NodeStartOpts describes a set of options one can set when (re-)starting a node
|
// NodeStartOpts describes a set of options one can set when (re-)starting a node
|
||||||
type NodeStartOpts struct {
|
type NodeStartOpts struct {
|
||||||
Wait bool
|
Wait bool
|
||||||
Timeout time.Duration
|
Timeout time.Duration
|
||||||
NodeHooks []NodeHook `yaml:"nodeHooks,omitempty" json:"nodeHooks,omitempty"`
|
NodeHooks []NodeHook `yaml:"nodeHooks,omitempty" json:"nodeHooks,omitempty"`
|
||||||
|
ReadyLogMessage string
|
||||||
}
|
}
|
||||||
|
|
||||||
// NodeDeleteOpts describes a set of options one can set when deleting a node
|
// NodeDeleteOpts describes a set of options one can set when deleting a node
|
||||||
|
Loading…
Reference in New Issue
Block a user