mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-05 12:41:31 +02:00
fix: random failures in cluster health checks
The problem was that some of the health checks sort the list of the nodes in place (via `sort.Strings()`). If cluster info provider returns original slice, it might be mutated in such a way that it gets corrupted. We never noticed it before CAPI clusters, as in our tests IPs are assigned sequentially, and sort operation is a no-op. Specifically, the problem was with the `Nodes()` function, it returns `append(controlPlaneNodes, workerNodes...)` slice, which by definition might share memory with `controlPlaneNodes` slice. For example, if control plane nodes were `4, 5, 6` and worker nodes were `3`, the returned slice will be `4, 5, 6, 3`, and it shares memory with `controlPlaneNodes` slice (firs three items). If we apply `sort` to the returned slice, it re-orders it as `3, 4, 5, 6`, but as it is done in-place, the `controlPlaneNodes` slice is now `3, 4, 5`, which is obviously wrong. Fix that by always returning a copy of the slice from the functions implementing `ClusterInfo` interface. Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
This commit is contained in:
parent
371cbfa7ae
commit
dc6ea74c35
@ -49,9 +49,9 @@ func (cluster *clusterNodes) NodesByType(t machine.Type) []string {
|
||||
|
||||
return []string{cluster.InitNode}
|
||||
case machine.TypeControlPlane:
|
||||
return cluster.ControlPlaneNodes
|
||||
return append([]string(nil), cluster.ControlPlaneNodes...)
|
||||
case machine.TypeJoin:
|
||||
return cluster.WorkerNodes
|
||||
return append([]string(nil), cluster.WorkerNodes...)
|
||||
case machine.TypeUnknown:
|
||||
return nil
|
||||
default:
|
||||
|
@ -81,7 +81,7 @@ type clusterState struct {
|
||||
}
|
||||
|
||||
func (cluster *clusterState) Nodes() []string {
|
||||
return append(cluster.controlPlaneNodes, cluster.workerNodes...)
|
||||
return append([]string(nil), append(cluster.controlPlaneNodes, cluster.workerNodes...)...)
|
||||
}
|
||||
|
||||
func (cluster *clusterState) NodesByType(t machine.Type) []string {
|
||||
@ -89,9 +89,9 @@ func (cluster *clusterState) NodesByType(t machine.Type) []string {
|
||||
case machine.TypeInit:
|
||||
return nil
|
||||
case machine.TypeControlPlane:
|
||||
return cluster.controlPlaneNodes
|
||||
return append([]string(nil), cluster.controlPlaneNodes...)
|
||||
case machine.TypeJoin:
|
||||
return cluster.workerNodes
|
||||
return append([]string(nil), cluster.workerNodes...)
|
||||
case machine.TypeUnknown:
|
||||
return nil
|
||||
default:
|
||||
|
@ -14,7 +14,7 @@ type infoWrapper struct {
|
||||
}
|
||||
|
||||
func (wrapper *infoWrapper) Nodes() []string {
|
||||
return append(wrapper.masterNodes, wrapper.workerNodes...)
|
||||
return append([]string(nil), append(wrapper.masterNodes, wrapper.workerNodes...)...)
|
||||
}
|
||||
|
||||
func (wrapper *infoWrapper) NodesByType(t machine.Type) []string {
|
||||
@ -22,9 +22,9 @@ func (wrapper *infoWrapper) NodesByType(t machine.Type) []string {
|
||||
case machine.TypeInit:
|
||||
return nil
|
||||
case machine.TypeControlPlane:
|
||||
return wrapper.masterNodes
|
||||
return append([]string(nil), wrapper.masterNodes...)
|
||||
case machine.TypeJoin:
|
||||
return wrapper.workerNodes
|
||||
return append([]string(nil), wrapper.workerNodes...)
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user