properly handle initNode and normalize waiting for log messages to determine successful node starts
This commit is contained in:
parent
bad77f0027
commit
e7c43df434
@ -22,7 +22,6 @@ THE SOFTWARE.
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
@ -392,6 +391,7 @@ ClusterCreatOpts:
|
||||
// the cluster has an init server node, but its not this one, so connect it to the init node
|
||||
if cluster.InitNode != nil && !node.ServerOpts.IsInit {
|
||||
node.Env = append(node.Env, fmt.Sprintf("K3S_URL=%s", connectionURL))
|
||||
node.Labels[k3d.LabelServerIsInit] = "false" // set label, that this server node is not the init server
|
||||
}
|
||||
|
||||
} else if node.Role == k3d.AgentRole {
|
||||
@ -425,6 +425,10 @@ ClusterCreatOpts:
|
||||
if cluster.InitNode != nil {
|
||||
log.Infoln("Creating initializing server node")
|
||||
cluster.InitNode.Args = append(cluster.InitNode.Args, "--cluster-init")
|
||||
if cluster.InitNode.Labels == nil {
|
||||
cluster.InitNode.Labels = map[string]string{}
|
||||
}
|
||||
cluster.InitNode.Labels[k3d.LabelServerIsInit] = "true" // set label, that this server node is the init server
|
||||
|
||||
// in case the LoadBalancer was disabled, expose the API Port on the initializing server node
|
||||
if clusterCreateOpts.DisableLoadBalancer {
|
||||
@ -797,39 +801,11 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust
|
||||
for _, n := range cluster.Nodes {
|
||||
if n.Role == k3d.ServerRole && n.ServerOpts.IsInit {
|
||||
if err := NodeStart(ctx, runtime, n, k3d.NodeStartOpts{
|
||||
Wait: true, // always wait for the init node
|
||||
NodeHooks: startClusterOpts.NodeHooks,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("Failed to start initializing server node: %+v", err)
|
||||
}
|
||||
// wait for the initnode to come up before doing anything else
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Errorln("Failed to bring up initializing server node in time")
|
||||
return fmt.Errorf(">>> %w", ctx.Err())
|
||||
default:
|
||||
}
|
||||
log.Debugln("Waiting for initializing server node...")
|
||||
logreader, err := runtime.GetNodeLogs(ctx, cluster.InitNode, time.Time{})
|
||||
if err != nil {
|
||||
if logreader != nil {
|
||||
logreader.Close()
|
||||
}
|
||||
log.Errorln(err)
|
||||
log.Errorln("Failed to get logs from the initializig server node.. waiting for 3 seconds instead")
|
||||
time.Sleep(3 * time.Second)
|
||||
break
|
||||
}
|
||||
defer logreader.Close()
|
||||
buf := new(bytes.Buffer)
|
||||
nRead, _ := buf.ReadFrom(logreader)
|
||||
logreader.Close()
|
||||
if nRead > 0 && strings.Contains(buf.String(), k3d.ReadyLogMessageByRole[k3d.ServerRole]) {
|
||||
log.Debugln("Initializing server node is up... continuing")
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
@ -847,24 +823,27 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust
|
||||
continue
|
||||
}
|
||||
|
||||
// check if node is running already to avoid waiting forever when checking for the node log message
|
||||
if !node.State.Running {
|
||||
|
||||
// start node
|
||||
if err := NodeStart(ctx, runtime, node, k3d.NodeStartOpts{
|
||||
NodeHooks: startClusterOpts.NodeHooks,
|
||||
}); err != nil {
|
||||
log.Warningf("Failed to start node '%s': Try to start it manually", node.Name)
|
||||
failed++
|
||||
// skip init node here, as it should be running already
|
||||
if node == cluster.InitNode || node.ServerOpts.IsInit {
|
||||
continue
|
||||
}
|
||||
|
||||
// wait for this server node to be ready (by checking the logs for a specific log message)
|
||||
if node.Role == k3d.ServerRole && startClusterOpts.WaitForServer {
|
||||
log.Debugf("Waiting for server node '%s' to get ready", node.Name)
|
||||
if err := NodeWaitForLogMessage(ctx, runtime, node, k3d.ReadyLogMessageByRole[k3d.ServerRole], start); err != nil {
|
||||
return fmt.Errorf("Server node '%s' failed to get ready: %+v", node.Name, err)
|
||||
// check if node is running already to avoid waiting forever when checking for the node log message
|
||||
if !node.State.Running {
|
||||
|
||||
nodeStartOpts := k3d.NodeStartOpts{
|
||||
NodeHooks: startClusterOpts.NodeHooks,
|
||||
}
|
||||
|
||||
if node.Role == k3d.ServerRole && startClusterOpts.WaitForServer {
|
||||
nodeStartOpts.Wait = true
|
||||
}
|
||||
|
||||
// start node
|
||||
if err := NodeStart(ctx, runtime, node, nodeStartOpts); err != nil {
|
||||
log.Warningf("Failed to start node '%s': Try to start it manually", node.Name)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -883,9 +862,14 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust
|
||||
// TODO: avoid `level=fatal msg="starting kubernetes: preparing server: post join: a configuration change is already in progress (5)"`
|
||||
// ... by scanning for this line in logs and restarting the container in case it appears
|
||||
log.Debugf("Starting to wait for loadbalancer node '%s'", serverlb.Name)
|
||||
if err := NodeWaitForLogMessage(ctx, runtime, serverlb, k3d.ReadyLogMessageByRole[k3d.LoadBalancerRole], start); err != nil {
|
||||
readyLogMessage := k3d.ReadyLogMessageByRole[k3d.LoadBalancerRole]
|
||||
if readyLogMessage != "" {
|
||||
if err := NodeWaitForLogMessage(ctx, runtime, serverlb, readyLogMessage, start); err != nil {
|
||||
return fmt.Errorf("Loadbalancer '%s' failed to get ready: %+v", serverlb.Name, err)
|
||||
}
|
||||
} else {
|
||||
log.Warnf("ClusterStart: Set to wait for node %s to be ready, but there's no target log message defined", serverlb.Name)
|
||||
}
|
||||
} else {
|
||||
log.Infof("Serverlb '%s' already running", serverlb.Name)
|
||||
}
|
||||
|
@ -161,7 +161,12 @@ func NodeAddToClusterMulti(ctx context.Context, runtime runtimes.Runtime, nodes
|
||||
currentNode := node
|
||||
nodeWaitGroup.Go(func() error {
|
||||
log.Debugf("Starting to wait for node '%s'", currentNode.Name)
|
||||
return NodeWaitForLogMessage(ctx, runtime, currentNode, k3d.ReadyLogMessageByRole[currentNode.Role], time.Time{})
|
||||
readyLogMessage := k3d.ReadyLogMessageByRole[currentNode.Role]
|
||||
if readyLogMessage != "" {
|
||||
return NodeWaitForLogMessage(ctx, runtime, currentNode, readyLogMessage, time.Time{})
|
||||
}
|
||||
log.Warnf("NodeAddToClusterMulti: Set to wait for node %s to get ready, but there's no target log message defined", currentNode.Name)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -191,7 +196,12 @@ func NodeCreateMulti(ctx context.Context, runtime runtimes.Runtime, nodes []*k3d
|
||||
currentNode := node
|
||||
nodeWaitGroup.Go(func() error {
|
||||
log.Debugf("Starting to wait for node '%s'", currentNode.Name)
|
||||
return NodeWaitForLogMessage(ctx, runtime, currentNode, k3d.ReadyLogMessageByRole[currentNode.Role], time.Time{})
|
||||
readyLogMessage := k3d.ReadyLogMessageByRole[currentNode.Role]
|
||||
if readyLogMessage != "" {
|
||||
return NodeWaitForLogMessage(ctx, runtime, currentNode, readyLogMessage, time.Time{})
|
||||
}
|
||||
log.Warnf("NodeCreateMulti: Set to wait for node %s to get ready, but there's no target log message defined", currentNode.Name)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -226,17 +236,32 @@ func NodeRun(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, node
|
||||
func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, nodeStartOpts k3d.NodeStartOpts) error {
|
||||
for _, hook := range nodeStartOpts.NodeHooks {
|
||||
if hook.Stage == k3d.LifecycleStagePreStart {
|
||||
log.Tracef("Executing preStartAction '%s'", reflect.TypeOf(hook))
|
||||
log.Tracef("Node %s: Executing preStartAction '%s'", node.Name, reflect.TypeOf(hook))
|
||||
if err := hook.Action.Run(ctx, node); err != nil {
|
||||
log.Errorf("Failed executing preStartAction '%+v': %+v", hook, err)
|
||||
log.Errorf("Node %s: Failed executing preStartAction '%+v': %+v", node.Name, hook, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Tracef("Starting node '%s'", node.Name)
|
||||
|
||||
startTime := time.Now()
|
||||
if err := runtime.StartNode(ctx, node); err != nil {
|
||||
log.Errorf("Failed to start node *'%s'", node.Name)
|
||||
log.Errorf("Failed to start node '%s'", node.Name)
|
||||
return err
|
||||
}
|
||||
|
||||
if nodeStartOpts.Wait {
|
||||
log.Debugf("Waiting for node %s to get ready", node.Name)
|
||||
readyLogMessage := k3d.ReadyLogMessageByRole[node.Role]
|
||||
if readyLogMessage != "" {
|
||||
if err := NodeWaitForLogMessage(ctx, runtime, node, readyLogMessage, startTime); err != nil {
|
||||
return fmt.Errorf("Node %s failed to get ready: %+v", node.Name, err)
|
||||
}
|
||||
} else {
|
||||
log.Warnf("NodeStart: Set to wait for node %s to be ready, but there's no target log message defined", node.Name)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -144,7 +144,7 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
|
||||
|
||||
// first, make sure, that it's actually a k3d managed container by checking if it has all the default labels
|
||||
for k, v := range k3d.DefaultObjectLabels {
|
||||
log.Tracef("TranslateContainerDetailsToNode: Checking for default object label %s=%s", k, v)
|
||||
log.Tracef("TranslateContainerDetailsToNode: Checking for default object label %s=%s on container %s", k, v, containerDetails.Name)
|
||||
found := false
|
||||
for lk, lv := range containerDetails.Config.Labels {
|
||||
if lk == k && lv == v {
|
||||
@ -176,8 +176,30 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
|
||||
}
|
||||
orderedNetworks = append(orderedNetworks, otherNetworks...)
|
||||
|
||||
// serverOpts
|
||||
/**
|
||||
* ServerOpts
|
||||
*/
|
||||
|
||||
// IsInit
|
||||
serverOpts := k3d.ServerOpts{IsInit: false}
|
||||
clusterInitFlagSet := false
|
||||
for _, arg := range containerDetails.Args {
|
||||
if strings.Contains(arg, "--cluster-init") {
|
||||
clusterInitFlagSet = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if serverIsInitLabel, ok := containerDetails.Config.Labels[k3d.LabelServerIsInit]; ok {
|
||||
if serverIsInitLabel == "true" {
|
||||
if !clusterInitFlagSet {
|
||||
log.Errorf("Container %s has label %s=true, but the args do not contain the --cluster-init flag", containerDetails.Name, k3d.LabelServerIsInit)
|
||||
} else {
|
||||
serverOpts.IsInit = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Kube API
|
||||
serverOpts.KubeAPI = &k3d.ExposureOpts{}
|
||||
for k, v := range containerDetails.Config.Labels {
|
||||
if k == k3d.LabelServerAPIHostIP {
|
||||
|
@ -121,6 +121,7 @@ const (
|
||||
LabelServerAPIPort string = "k3d.server.api.port"
|
||||
LabelServerAPIHost string = "k3d.server.api.host"
|
||||
LabelServerAPIHostIP string = "k3d.server.api.hostIP"
|
||||
LabelServerIsInit string = "k3d.server.init"
|
||||
LabelRegistryHost string = "k3d.registry.host"
|
||||
LabelRegistryHostIP string = "k3d.registry.hostIP"
|
||||
LabelRegistryPortExternal string = "k3s.registry.port.external"
|
||||
|
Loading…
Reference in New Issue
Block a user