fix: proper event patching, powercycle vs. poweron

Fixes #145

This fixes event recording, updates `patchServerInUse` to avoid
duplicate updates, adds condition to avoid multiple power cycle events.

We might want to expand on conditions to report better state to the
user.

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
This commit is contained in:
Andrey Smirnov 2020-10-13 00:59:26 +03:00 committed by talos-bot
parent 4d415c8c76
commit fdd2e44de5
16 changed files with 331 additions and 146 deletions

View File

@ -12,6 +12,7 @@ rules:
- events
verbs:
- create
- patch
- apiGroups:
- ""
resources:

View File

@ -49,7 +49,7 @@ type MetalMachineReconciler struct {
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers,verbs=get;list;watch;
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers/status,verbs=get;update;patch
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=events,verbs=create
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
func (r *MetalMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, err error) {
ctx := context.Background()
@ -211,7 +211,7 @@ func (r *MetalMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, err
}
if !mgmtClient.IsFake() {
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", "Server powered on.")
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server powered on.")
}
}
@ -260,7 +260,7 @@ func (r *MetalMachineReconciler) reconcileDelete(ctx context.Context, metalMachi
return ctrl.Result{}, err
}
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", "Server marked as unallocated")
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", "Server marked as unallocated.")
}
// Machine is deleted so remove the finalizer.
@ -397,52 +397,36 @@ func (r *MetalMachineReconciler) patchProviderID(ctx context.Context, cluster *c
// patchServerInUse updates a server to mark it as "in use".
func (r *MetalMachineReconciler) patchServerInUse(ctx context.Context, serverClass *metalv1alpha1.ServerClass, serverObj *metalv1alpha1.Server, metalMachine *infrav1.MetalMachine) error {
ref, err := reference.GetReference(r.Scheme, serverObj)
if err != nil {
return err
}
if !serverObj.Status.InUse || serverObj.Status.IsClean {
ref, err := reference.GetReference(r.Scheme, serverObj)
if err != nil {
return err
}
serverObj.Status.InUse = true
serverObj.Status.IsClean = false
serverObj.Status.InUse = true
serverObj.Status.IsClean = false
// nb: we update status and then update the object separately b/c statuses don't seem to get
// updated when doing the whole object below.
if err := r.Status().Update(ctx, serverObj); err != nil {
return err
}
// nb: we update status and then update the object separately b/c statuses don't seem to get
// updated when doing the whole object below.
if err := r.Status().Update(ctx, serverObj); err != nil {
return err
}
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", fmt.Sprintf("Server marked as allocated for metalMachine %q", metalMachine.Name))
rollback := func() {
// update failed, roll back Status changes
serverObj.Status.InUse = false
r.Status().Update(ctx, serverObj) //nolint: errcheck
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", fmt.Sprintf("Server marked as allocated for metalMachine %q", metalMachine.Name))
}
if serverClass != nil {
for {
serverObj.OwnerReferences = []metav1.OwnerReference{
*metav1.NewControllerRef(serverClass, metalv1alpha1.GroupVersion.WithKind("ServerClass")),
}
patchHelper, err := patch.NewHelper(serverObj, r)
if err != nil {
return err
}
err := r.Update(ctx, serverObj)
if err == nil {
return nil
}
serverObj.OwnerReferences = []metav1.OwnerReference{
*metav1.NewControllerRef(serverClass, metalv1alpha1.GroupVersion.WithKind("ServerClass")),
}
if !apierrors.IsConflict(err) {
rollback()
return err
}
// conflict happened, retry
if err = r.Get(ctx, types.NamespacedName{Namespace: serverObj.Namespace, Name: serverObj.Name}, serverObj); err != nil {
rollback()
return err
}
if err := patchHelper.Patch(ctx, serverObj); err != nil {
return err
}
}

View File

@ -10,6 +10,7 @@ import (
corev1 "k8s.io/api/core/v1"
apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
)
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
@ -100,12 +101,16 @@ type ServerSpec struct {
Accepted bool `json:"accepted"`
}
// ConditionPowerCycle is used to control the powercycle flow.
const ConditionPowerCycle clusterv1.ConditionType = "PowerCycle"
// ServerStatus defines the observed state of Server.
type ServerStatus struct {
Ready bool `json:"ready"`
InUse bool `json:"inUse"`
IsClean bool `json:"isClean"`
Addresses []corev1.NodeAddress `json:"addresses,omitempty"`
Ready bool `json:"ready"`
InUse bool `json:"inUse"`
IsClean bool `json:"isClean"`
Conditions []clusterv1.Condition `json:"conditions,omitempty"`
Addresses []corev1.NodeAddress `json:"addresses,omitempty"`
}
// +kubebuilder:object:root=true
@ -125,6 +130,14 @@ type Server struct {
Status ServerStatus `json:"status,omitempty"`
}
func (s *Server) GetConditions() clusterv1.Conditions {
return s.Status.Conditions
}
func (s *Server) SetConditions(conditions clusterv1.Conditions) {
s.Status.Conditions = conditions
}
// +kubebuilder:object:root=true
// ServerList contains a list of Server.

View File

@ -11,6 +11,7 @@ package v1alpha1
import (
v1 "k8s.io/api/core/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/cluster-api/api/v1alpha3"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
@ -490,6 +491,13 @@ func (in *ServerSpec) DeepCopy() *ServerSpec {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServerStatus) DeepCopyInto(out *ServerStatus) {
*out = *in
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
*out = make([]v1alpha3.Condition, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Addresses != nil {
in, out := &in.Addresses, &out.Addresses
*out = make([]v1.NodeAddress, len(*in))

View File

@ -15,6 +15,7 @@ import (
"github.com/talos-systems/go-blockdevice/blockdevice/probe"
"github.com/talos-systems/go-procfs/procfs"
"github.com/talos-systems/go-retry/retry"
"github.com/talos-systems/go-smbios/smbios"
talosnet "github.com/talos-systems/net"
"golang.org/x/sys/unix"
@ -69,18 +70,7 @@ func setup() error {
return nil
}
func create(endpoint string, s *smbios.Smbios) (*api.CreateServerResponse, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
conn, err := grpc.DialContext(ctx, endpoint, grpc.WithInsecure(), grpc.WithBlock())
if err != nil {
return nil, err
}
defer conn.Close()
c := api.NewAgentClient(conn)
func create(ctx context.Context, client api.AgentClient, s *smbios.Smbios) (*api.CreateServerResponse, error) {
uuid, err := s.SystemInformation().UUID()
if err != nil {
return nil, err
@ -109,56 +99,48 @@ func create(endpoint string, s *smbios.Smbios) (*api.CreateServerResponse, error
req.Hostname = hostname
}
resp, err := c.CreateServer(ctx, req)
if err != nil {
return nil, err
}
var resp *api.CreateServerResponse
return resp, nil
err = retry.Constant(5*time.Minute, retry.WithUnits(30*time.Second)).Retry(func() error {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
resp, err = client.CreateServer(ctx, req)
if err != nil {
return retry.ExpectedError(err)
}
return nil
})
return resp, err
}
func wipe(endpoint string, s *smbios.Smbios) error {
func wipe(ctx context.Context, client api.AgentClient, s *smbios.Smbios) error {
uuid, err := s.SystemInformation().UUID()
if err != nil {
return err
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
return retry.Constant(5*time.Minute, retry.WithUnits(30*time.Second)).Retry(func() error {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
conn, err := grpc.DialContext(ctx, endpoint, grpc.WithInsecure(), grpc.WithBlock())
if err != nil {
return err
}
defer conn.Close()
_, err = client.MarkServerAsWiped(ctx, &api.MarkServerAsWipedRequest{Uuid: uuid.String()})
if err != nil {
return retry.ExpectedError(err)
}
c := api.NewAgentClient(conn)
_, err = c.MarkServerAsWiped(ctx, &api.MarkServerAsWipedRequest{Uuid: uuid.String()})
if err != nil {
return err
}
return nil
return nil
})
}
func reconcileIPs(endpoint string, s *smbios.Smbios, ips []net.IP) error {
func reconcileIPs(ctx context.Context, client api.AgentClient, s *smbios.Smbios, ips []net.IP) error {
uuid, err := s.SystemInformation().UUID()
if err != nil {
return err
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
conn, err := grpc.DialContext(ctx, endpoint, grpc.WithInsecure(), grpc.WithBlock())
if err != nil {
return err
}
defer conn.Close()
c := api.NewAgentClient(conn)
addresses := make([]*api.Address, len(ips))
for i := range addresses {
addresses[i] = &api.Address{
@ -167,15 +149,20 @@ func reconcileIPs(endpoint string, s *smbios.Smbios, ips []net.IP) error {
}
}
_, err = c.ReconcileServerAddresses(ctx, &api.ReconcileServerAddressesRequest{
Uuid: uuid.String(),
Address: addresses,
})
if err != nil {
return err
}
return retry.Constant(5*time.Minute, retry.WithUnits(30*time.Second)).Retry(func() error {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
return nil
_, err = client.ReconcileServerAddresses(ctx, &api.ReconcileServerAddressesRequest{
Uuid: uuid.String(),
Address: addresses,
})
if err != nil {
return retry.ExpectedError(err)
}
return nil
})
}
func shutdown(err error) {
@ -186,6 +173,12 @@ func shutdown(err error) {
log.Printf("rebooting in %d seconds\n", i)
time.Sleep(1 * time.Second)
}
if unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART) == nil {
select {}
}
os.Exit(1)
}
if unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF) == nil {
@ -196,35 +189,65 @@ func shutdown(err error) {
os.Exit(1)
}
func main() {
func connect(ctx context.Context, endpoint string) (*grpc.ClientConn, error) {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
return grpc.DialContext(ctx, endpoint, grpc.WithInsecure())
}
func mainFunc() error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if err := setup(); err != nil {
shutdown(err)
return err
}
var endpoint string
if found := procfs.ProcCmdline().Get(constants.AgentEndpointArg).First(); found != nil {
endpoint = *found
} else {
shutdown(fmt.Errorf("no endpoint found"))
return fmt.Errorf("no endpoint found")
}
log.Printf("Using %q as API endpoint", endpoint)
conn, err := connect(ctx, endpoint)
if err != nil {
return err
}
defer conn.Close()
client := api.NewAgentClient(conn)
log.Println("Reading SMBIOS")
s, err := smbios.New()
if err != nil {
shutdown(err)
return err
}
resp, err := create(endpoint, s)
createResp, err := create(ctx, client, s)
if err != nil {
shutdown(err)
return err
}
log.Println("Registration complete")
if resp.GetWipe() {
ips, err := talosnet.IPAddrs()
if err != nil {
log.Println("failed to discover IPs")
} else {
if err = reconcileIPs(ctx, client, s, ips); err != nil {
shutdown(err)
}
log.Printf("Reconciled IPs")
}
if createResp.GetWipe() {
bds, err := probe.All()
if err != nil {
shutdown(err)
@ -247,23 +270,16 @@ func main() {
}
}
if err := wipe(endpoint, s); err != nil {
if err := wipe(ctx, client, s); err != nil {
shutdown(err)
}
log.Println("Wipe complete")
}
ips, err := talosnet.IPAddrs()
if err != nil {
log.Println("failed to discover IPs")
} else {
if err := reconcileIPs(endpoint, s, ips); err != nil {
shutdown(err)
}
log.Printf("Reconciled IPs")
}
shutdown(nil)
return nil
}
func main() {
shutdown(mainFunc())
}

View File

@ -197,6 +197,49 @@ spec:
- type
type: object
type: array
conditions:
items:
description: Condition defines an observation of a Cluster API resource
operational state.
properties:
lastTransitionTime:
description: Last time the condition transitioned from one status
to another. This should be when the underlying condition changed.
If that is not known, then using the time when the API field
changed is acceptable.
format: date-time
type: string
message:
description: A human readable message indicating details about
the transition. This field may be empty.
type: string
reason:
description: The reason for the condition's last transition
in CamelCase. The specific API may choose whether or not this
field is considered a guaranteed API. This field may not be
empty.
type: string
severity:
description: Severity provides an explicit classification of
Reason code, so the users or machines can immediately understand
the current situation and act accordingly. The Severity field
MUST be set only when Status=False.
type: string
status:
description: Status of the condition, one of True, False, Unknown.
type: string
type:
description: Type of condition in CamelCase or in foo.example.com/CamelCase.
Many .condition.type values are consistent across resources
like Available, but because arbitrary conditions can be useful
(see .node.status.conditions), the ability to deconflict is
important.
type: string
required:
- status
- type
type: object
type: array
inUse:
type: boolean
isClean:

View File

@ -12,6 +12,7 @@ rules:
- events
verbs:
- create
- patch
- apiGroups:
- metal.sidero.dev
resources:

View File

@ -13,6 +13,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/tools/reference"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
@ -31,7 +33,7 @@ type ServerReconciler struct {
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers/status,verbs=get;update;patch
// +kubebuilder:rbac:groups="",resources=events,verbs=create
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
ctx := context.Background()
@ -94,7 +96,7 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
}
if !mgmtClient.IsFake() {
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", "Server powered off.")
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server powered off.")
}
}
@ -102,6 +104,10 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
case s.Status.InUse && !s.Status.IsClean:
return f(true, false)
case !s.Status.InUse && !s.Status.IsClean:
if conditions.Has(&s, metalv1alpha1.ConditionPowerCycle) && conditions.IsFalse(&s, metalv1alpha1.ConditionPowerCycle) {
return f(false, false)
}
mgmtClient, err := metal.NewManagementClient(&s.Spec)
if err != nil {
log.Error(err, "failed to create management client")
@ -110,7 +116,7 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
return f(false, true)
}
_, err = mgmtClient.IsPoweredOn()
poweredOn, err := mgmtClient.IsPoweredOn()
if err != nil {
log.Error(err, "failed to check power state")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to determine power status: %s.", err))
@ -126,16 +132,32 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
return f(false, true)
}
err = mgmtClient.PowerCycle()
if err != nil {
log.Error(err, "failed to power cycle")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to power cycle: %s.", err))
if poweredOn {
err = mgmtClient.PowerCycle()
if err != nil {
log.Error(err, "failed to power cycle")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to power cycle: %s.", err))
return f(false, true)
return f(false, true)
}
} else {
err = mgmtClient.PowerOn()
if err != nil {
log.Error(err, "failed to power on")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to power on: %s.", err))
return f(false, true)
}
}
if !mgmtClient.IsFake() {
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", "Server power cycled.")
if poweredOn {
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server power cycled and set to PXE boot once.")
} else {
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server powered on and set to PXE boot once.")
}
conditions.MarkFalse(&s, metalv1alpha1.ConditionPowerCycle, "InProgress", clusterv1.ConditionSeverityInfo, "Server power cycled for wiping.")
}
return f(false, false)

View File

@ -23,6 +23,7 @@ import (
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/tools/reference"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/patch"
controllerclient "sigs.k8s.io/controller-runtime/pkg/client"
@ -124,6 +125,8 @@ func (s *server) MarkServerAsWiped(ctx context.Context, in *api.MarkServerAsWipe
obj.Status.IsClean = true
conditions.MarkTrue(obj, metalv1alpha1.ConditionPowerCycle)
if err := patchHelper.Patch(ctx, obj); err != nil {
return nil, err
}

1
go.mod
View File

@ -17,6 +17,7 @@ require (
github.com/talos-systems/cluster-api-control-plane-provider-talos v0.1.0-alpha.4
github.com/talos-systems/go-blockdevice v0.1.0
github.com/talos-systems/go-procfs v0.0.0-20200219015357-57c7311fdd45
github.com/talos-systems/go-retry v0.1.1-0.20200922131245-752f081252cf
github.com/talos-systems/go-smbios v0.0.0-20200219201045-94b8c4e489ee
github.com/talos-systems/net v0.2.0
github.com/talos-systems/talos v0.7.0-alpha.4

View File

@ -69,11 +69,7 @@ func (c *Client) PowerOff() error {
// PowerCycle will power cycle a given machine.
func (c *Client) PowerCycle() error {
if err := c.postRequest("/poweroff"); err != nil {
return err
}
return c.postRequest("/poweron")
return c.postRequest("/reboot")
}
// SetPXE makes sure the node will pxe boot next time.

View File

@ -116,7 +116,9 @@ func TestManagementCluster(ctx context.Context, metalClient client.Client, clust
obj := obj
_, err = dr.Create(ctx, &obj, metav1.CreateOptions{})
_, err = dr.Create(ctx, &obj, metav1.CreateOptions{
FieldManager: "sfyra",
})
if err != nil {
if apierrors.IsAlreadyExists(err) {
_, err = dr.Patch(ctx, obj.GetName(), types.ApplyPatchType, data, metav1.PatchOptions{

View File

@ -0,0 +1,88 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package tests
import (
"context"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/talos-systems/go-retry/retry"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/cluster-api/api/v1alpha3"
"sigs.k8s.io/controller-runtime/pkg/client"
)
// TestMachineDeploymentReconcile verifies that machine deployment can reconcile delete machines.
func TestMachineDeploymentReconcile(ctx context.Context, metalClient client.Client) TestFunc {
return func(t *testing.T) {
var machineDeployment v1alpha3.MachineDeployment
const machineDeploymentName = "management-cluster-workers"
err := metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: machineDeploymentName}, &machineDeployment)
require.NoError(t, err)
var machines v1alpha3.MachineList
labelSelector, err := labels.Parse(machineDeployment.Status.Selector)
require.NoError(t, err)
err = metalClient.List(ctx, &machines, client.MatchingLabelsSelector{Selector: labelSelector})
require.NoError(t, err)
replicas := int32(len(machines.Items))
for _, machine := range machines.Items {
machine := machine
err = metalClient.Delete(ctx, &machine)
require.NoError(t, err)
}
// first, controller should pick up the fact that some replicas are missing
err = retry.Constant(1*time.Minute, retry.WithUnits(100*time.Millisecond)).Retry(func() error {
var machineDeployment v1alpha3.MachineDeployment
err = metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: machineDeploymentName}, &machineDeployment)
if err != nil {
return retry.UnexpectedError(err)
}
if machineDeployment.Status.UnavailableReplicas != replicas {
return retry.ExpectedError(fmt.Errorf("expected %d unavailable replicas, got %d", replicas, machineDeployment.Status.ReadyReplicas))
}
return nil
})
require.NoError(t, err)
// next, check that replicas get reconciled
err = retry.Constant(10*time.Minute, retry.WithUnits(10*time.Second)).Retry(func() error {
err = metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: machineDeploymentName}, &machineDeployment)
if err != nil {
return retry.UnexpectedError(err)
}
if v1alpha3.MachineDeploymentPhase(machineDeployment.Status.Phase) != v1alpha3.MachineDeploymentPhaseRunning {
return retry.ExpectedError(fmt.Errorf("expected %s phase, got %s", v1alpha3.MachineDeploymentPhaseRunning, machineDeployment.Status.Phase))
}
if machineDeployment.Status.Replicas != replicas {
return retry.ExpectedError(fmt.Errorf("expected %d replicas, got %d", replicas, machineDeployment.Status.Replicas))
}
if machineDeployment.Status.ReadyReplicas != replicas {
return retry.ExpectedError(fmt.Errorf("expected %d ready replicas, got %d", replicas, machineDeployment.Status.ReadyReplicas))
}
return nil
})
require.NoError(t, err)
}
}

View File

@ -13,16 +13,14 @@ import (
"github.com/stretchr/testify/require"
"github.com/talos-systems/go-retry/retry"
"k8s.io/apimachinery/pkg/labels"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/controller-runtime/pkg/client"
metal "github.com/talos-systems/sidero/app/cluster-api-provider-sidero/api/v1alpha3"
sidero "github.com/talos-systems/sidero/app/metal-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/sfyra/pkg/vm"
)
// TestServerReset verifies that all the servers got reset.
func TestServerReset(ctx context.Context, metalClient client.Client, vmSet *vm.Set) TestFunc {
func TestServerReset(ctx context.Context, metalClient client.Client) TestFunc {
return func(t *testing.T) {
var machines metal.MetalMachineList
@ -40,14 +38,11 @@ func TestServerReset(ctx context.Context, metalClient client.Client, vmSet *vm.S
}
serverNamesToCheck = append(serverNamesToCheck, machines.Items[i].Spec.ServerRef.Name)
ownerMachine, err := util.GetOwnerMachine(ctx, metalClient, machines.Items[i].ObjectMeta)
require.NoError(t, err)
err = metalClient.Delete(ctx, ownerMachine)
require.NoError(t, err)
}
err = scaleWorkers(ctx, metalClient, 0)
require.NoError(t, err)
err = retry.Constant(5*time.Minute, retry.WithUnits(10*time.Second)).Retry(func() error {
var servers sidero.ServerList
@ -79,8 +74,6 @@ func TestServerReset(ctx context.Context, metalClient client.Client, vmSet *vm.S
return nil
})
// TODO: Wait for machinedeployment to reconcile deleted machine.
require.NoError(t, err)
}
}

View File

@ -102,6 +102,10 @@ func scaleWorkers(ctx context.Context, metalClient client.Client, replicas int32
return fmt.Errorf("expected %s phase, got %s", v1alpha3.MachineDeploymentPhaseRunning, o.Status.Phase)
}
if o.Status.Replicas != replicas {
return fmt.Errorf("expected %d replicas, got %d", replicas, o.Status.Replicas)
}
if o.Status.ReadyReplicas != replicas {
return fmt.Errorf("expected %d ready replicas, got %d", replicas, o.Status.ReadyReplicas)
}
@ -123,6 +127,8 @@ func scaleWorkers(ctx context.Context, metalClient client.Client, replicas int32
}
func scale(ctx context.Context, metalClient client.Client, name string, obj runtime.Object, set, verify ScaleCallBack) error {
cleanObj := obj.DeepCopyObject()
err := metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: name}, obj)
if err != nil {
return err
@ -133,12 +139,16 @@ func scale(ctx context.Context, metalClient client.Client, name string, obj runt
return err
}
err = metalClient.Update(ctx, obj)
err = metalClient.Update(ctx, obj, &client.UpdateOptions{
FieldManager: "sfyra",
})
if err != nil {
return err
}
err = retry.Constant(10*time.Minute, retry.WithUnits(10*time.Second)).Retry(func() error {
obj = cleanObj.DeepCopyObject()
err := metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: name}, obj)
if err != nil {
return retry.UnexpectedError(err)

View File

@ -99,9 +99,13 @@ func Run(ctx context.Context, cluster talos.Cluster, vmSet *vm.Set, capiManager
"TestScaleControlPlaneDown",
TestScaleControlPlaneDown(ctx, metalClient, vmSet),
},
{
"TestMachineDeploymentReconcile",
TestMachineDeploymentReconcile(ctx, metalClient),
},
{
"TestServerReset",
TestServerReset(ctx, metalClient, vmSet),
TestServerReset(ctx, metalClient),
},
}