This commit is contained in:
parent
3b52e0407e
commit
2d8d60b005
@ -122,6 +122,7 @@ func parseCreateNodeCmd(cmd *cobra.Command, args []string) ([]*k3d.Node, *k3d.Cl
|
||||
Labels: map[string]string{
|
||||
k3d.LabelRole: roleStr,
|
||||
},
|
||||
Restart: true,
|
||||
}
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ func ClusterCreate(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clus
|
||||
|
||||
node.Name = generateNodeName(cluster.Name, node.Role, suffix)
|
||||
node.Network = cluster.Network.Name
|
||||
|
||||
node.Restart = true
|
||||
node.GPURequest = cluster.CreateClusterOpts.GPURequest
|
||||
|
||||
// create node
|
||||
@ -343,6 +343,7 @@ func ClusterCreate(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clus
|
||||
Role: k3d.LoadBalancerRole,
|
||||
Labels: k3d.DefaultObjectLabels, // TODO: createLoadBalancer: add more expressive labels
|
||||
Network: cluster.Network.Name,
|
||||
Restart: true,
|
||||
}
|
||||
cluster.Nodes = append(cluster.Nodes, lbNode) // append lbNode to list of cluster nodes, so it will be considered during rollback
|
||||
log.Infof("Creating LoadBalancer '%s'", lbNode.Name)
|
||||
|
@ -324,7 +324,7 @@ func NodeGet(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node) (*k3
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// NodeWaitForLogMessage follows the logs of a node container and returns if it finds a specific line in there (or timeout is reached)
|
||||
//NodeWaitForLogMessage follows the logs of a node container and returns if it finds a specific line in there (or timeout is reached)
|
||||
func NodeWaitForLogMessage(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, message string, since time.Time) error {
|
||||
for {
|
||||
select {
|
||||
@ -353,8 +353,15 @@ func NodeWaitForLogMessage(ctx context.Context, runtime runtimes.Runtime, node *
|
||||
if nRead > 0 && strings.Contains(output, message) {
|
||||
break
|
||||
}
|
||||
|
||||
// check if the container is restarting
|
||||
running, status, _ := runtime.GetNodeStatus(ctx, node)
|
||||
if running && status == k3d.NodeStatusRestarting {
|
||||
return fmt.Errorf("Node %s is restarting, early exit to avoid crash loop", node.Name)
|
||||
}
|
||||
|
||||
time.Sleep(500 * time.Millisecond) // wait for half a second to avoid overloading docker (error `socket: too many open files`)
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond) // wait for half a second to avoid overloading docker (error `socket: too many open files`)
|
||||
log.Debugf("Finished waiting for log message '%s' from node '%s'", message, node.Name)
|
||||
return nil
|
||||
}
|
||||
|
@ -118,6 +118,11 @@ func (d Containerd) GetNode(ctx context.Context, node *k3d.Node) (*k3d.Node, err
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// GetNodeStatus returns the status of a node (Running, Started, etc.)
|
||||
func (d Containerd) GetNodeStatus(ctx context.Context, node *k3d.Node) (bool, string, error) {
|
||||
return true, "", nil
|
||||
}
|
||||
|
||||
// GetNodeLogs returns the logs from a given node
|
||||
func (d Containerd) GetNodeLogs(ctx context.Context, node *k3d.Node, since time.Time) (io.ReadCloser, error) {
|
||||
return nil, nil
|
||||
|
@ -55,6 +55,7 @@ type Runtime interface {
|
||||
DeleteNode(context.Context, *k3d.Node) error
|
||||
GetNodesByLabel(context.Context, map[string]string) ([]*k3d.Node, error)
|
||||
GetNode(context.Context, *k3d.Node) (*k3d.Node, error)
|
||||
GetNodeStatus(context.Context, *k3d.Node) (bool, string, error)
|
||||
CreateNetworkIfNotPresent(context.Context, string) (string, bool, error) // @return NETWORK_NAME, EXISTS, ERROR
|
||||
GetKubeconfig(context.Context, *k3d.Node) (io.ReadCloser, error)
|
||||
DeleteNetwork(context.Context, string) error
|
||||
|
@ -54,6 +54,9 @@ var ReadyLogMessageByRole = map[Role]string{
|
||||
LoadBalancerRole: "start worker processes",
|
||||
}
|
||||
|
||||
// NodeStatusRestarting defines the status string that signals the node container is restarting
|
||||
const NodeStatusRestarting = "restarting"
|
||||
|
||||
// Role defines a k3d node role
|
||||
type Role string
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user