mirror of
https://github.com/siderolabs/sidero.git
synced 2026-05-04 20:06:32 +02:00
fix: proper event patching, powercycle vs. poweron
Fixes #145 This fixes event recording, updates `patchServerInUse` to avoid duplicate updates, adds condition to avoid multiple power cycle events. We might want to expand on conditions to report better state to the user. Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
This commit is contained in:
parent
4d415c8c76
commit
fdd2e44de5
@ -12,6 +12,7 @@ rules:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
|
||||
@ -49,7 +49,7 @@ type MetalMachineReconciler struct {
|
||||
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers,verbs=get;list;watch;
|
||||
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers/status,verbs=get;update;patch
|
||||
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch
|
||||
// +kubebuilder:rbac:groups="",resources=events,verbs=create
|
||||
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
|
||||
|
||||
func (r *MetalMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, err error) {
|
||||
ctx := context.Background()
|
||||
@ -211,7 +211,7 @@ func (r *MetalMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, err
|
||||
}
|
||||
|
||||
if !mgmtClient.IsFake() {
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", "Server powered on.")
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server powered on.")
|
||||
}
|
||||
}
|
||||
|
||||
@ -260,7 +260,7 @@ func (r *MetalMachineReconciler) reconcileDelete(ctx context.Context, metalMachi
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", "Server marked as unallocated")
|
||||
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", "Server marked as unallocated.")
|
||||
}
|
||||
|
||||
// Machine is deleted so remove the finalizer.
|
||||
@ -397,52 +397,36 @@ func (r *MetalMachineReconciler) patchProviderID(ctx context.Context, cluster *c
|
||||
|
||||
// patchServerInUse updates a server to mark it as "in use".
|
||||
func (r *MetalMachineReconciler) patchServerInUse(ctx context.Context, serverClass *metalv1alpha1.ServerClass, serverObj *metalv1alpha1.Server, metalMachine *infrav1.MetalMachine) error {
|
||||
ref, err := reference.GetReference(r.Scheme, serverObj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !serverObj.Status.InUse || serverObj.Status.IsClean {
|
||||
ref, err := reference.GetReference(r.Scheme, serverObj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
serverObj.Status.InUse = true
|
||||
serverObj.Status.IsClean = false
|
||||
serverObj.Status.InUse = true
|
||||
serverObj.Status.IsClean = false
|
||||
|
||||
// nb: we update status and then update the object separately b/c statuses don't seem to get
|
||||
// updated when doing the whole object below.
|
||||
if err := r.Status().Update(ctx, serverObj); err != nil {
|
||||
return err
|
||||
}
|
||||
// nb: we update status and then update the object separately b/c statuses don't seem to get
|
||||
// updated when doing the whole object below.
|
||||
if err := r.Status().Update(ctx, serverObj); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", fmt.Sprintf("Server marked as allocated for metalMachine %q", metalMachine.Name))
|
||||
|
||||
rollback := func() {
|
||||
// update failed, roll back Status changes
|
||||
serverObj.Status.InUse = false
|
||||
|
||||
r.Status().Update(ctx, serverObj) //nolint: errcheck
|
||||
r.Recorder.Event(ref, corev1.EventTypeNormal, "Server Allocation", fmt.Sprintf("Server marked as allocated for metalMachine %q", metalMachine.Name))
|
||||
}
|
||||
|
||||
if serverClass != nil {
|
||||
for {
|
||||
serverObj.OwnerReferences = []metav1.OwnerReference{
|
||||
*metav1.NewControllerRef(serverClass, metalv1alpha1.GroupVersion.WithKind("ServerClass")),
|
||||
}
|
||||
patchHelper, err := patch.NewHelper(serverObj, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err := r.Update(ctx, serverObj)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
serverObj.OwnerReferences = []metav1.OwnerReference{
|
||||
*metav1.NewControllerRef(serverClass, metalv1alpha1.GroupVersion.WithKind("ServerClass")),
|
||||
}
|
||||
|
||||
if !apierrors.IsConflict(err) {
|
||||
rollback()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// conflict happened, retry
|
||||
if err = r.Get(ctx, types.NamespacedName{Namespace: serverObj.Namespace, Name: serverObj.Name}, serverObj); err != nil {
|
||||
rollback()
|
||||
|
||||
return err
|
||||
}
|
||||
if err := patchHelper.Patch(ctx, serverObj); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -10,6 +10,7 @@ import (
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
|
||||
)
|
||||
|
||||
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
|
||||
@ -100,12 +101,16 @@ type ServerSpec struct {
|
||||
Accepted bool `json:"accepted"`
|
||||
}
|
||||
|
||||
// ConditionPowerCycle is used to control the powercycle flow.
|
||||
const ConditionPowerCycle clusterv1.ConditionType = "PowerCycle"
|
||||
|
||||
// ServerStatus defines the observed state of Server.
|
||||
type ServerStatus struct {
|
||||
Ready bool `json:"ready"`
|
||||
InUse bool `json:"inUse"`
|
||||
IsClean bool `json:"isClean"`
|
||||
Addresses []corev1.NodeAddress `json:"addresses,omitempty"`
|
||||
Ready bool `json:"ready"`
|
||||
InUse bool `json:"inUse"`
|
||||
IsClean bool `json:"isClean"`
|
||||
Conditions []clusterv1.Condition `json:"conditions,omitempty"`
|
||||
Addresses []corev1.NodeAddress `json:"addresses,omitempty"`
|
||||
}
|
||||
|
||||
// +kubebuilder:object:root=true
|
||||
@ -125,6 +130,14 @@ type Server struct {
|
||||
Status ServerStatus `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
func (s *Server) GetConditions() clusterv1.Conditions {
|
||||
return s.Status.Conditions
|
||||
}
|
||||
|
||||
func (s *Server) SetConditions(conditions clusterv1.Conditions) {
|
||||
s.Status.Conditions = conditions
|
||||
}
|
||||
|
||||
// +kubebuilder:object:root=true
|
||||
|
||||
// ServerList contains a list of Server.
|
||||
|
||||
@ -11,6 +11,7 @@ package v1alpha1
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
"sigs.k8s.io/cluster-api/api/v1alpha3"
|
||||
)
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
@ -490,6 +491,13 @@ func (in *ServerSpec) DeepCopy() *ServerSpec {
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ServerStatus) DeepCopyInto(out *ServerStatus) {
|
||||
*out = *in
|
||||
if in.Conditions != nil {
|
||||
in, out := &in.Conditions, &out.Conditions
|
||||
*out = make([]v1alpha3.Condition, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
if in.Addresses != nil {
|
||||
in, out := &in.Addresses, &out.Addresses
|
||||
*out = make([]v1.NodeAddress, len(*in))
|
||||
|
||||
@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/talos-systems/go-blockdevice/blockdevice/probe"
|
||||
"github.com/talos-systems/go-procfs/procfs"
|
||||
"github.com/talos-systems/go-retry/retry"
|
||||
"github.com/talos-systems/go-smbios/smbios"
|
||||
talosnet "github.com/talos-systems/net"
|
||||
"golang.org/x/sys/unix"
|
||||
@ -69,18 +70,7 @@ func setup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func create(endpoint string, s *smbios.Smbios) (*api.CreateServerResponse, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := grpc.DialContext(ctx, endpoint, grpc.WithInsecure(), grpc.WithBlock())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
c := api.NewAgentClient(conn)
|
||||
|
||||
func create(ctx context.Context, client api.AgentClient, s *smbios.Smbios) (*api.CreateServerResponse, error) {
|
||||
uuid, err := s.SystemInformation().UUID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -109,56 +99,48 @@ func create(endpoint string, s *smbios.Smbios) (*api.CreateServerResponse, error
|
||||
req.Hostname = hostname
|
||||
}
|
||||
|
||||
resp, err := c.CreateServer(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var resp *api.CreateServerResponse
|
||||
|
||||
return resp, nil
|
||||
err = retry.Constant(5*time.Minute, retry.WithUnits(30*time.Second)).Retry(func() error {
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
resp, err = client.CreateServer(ctx, req)
|
||||
if err != nil {
|
||||
return retry.ExpectedError(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
func wipe(endpoint string, s *smbios.Smbios) error {
|
||||
func wipe(ctx context.Context, client api.AgentClient, s *smbios.Smbios) error {
|
||||
uuid, err := s.SystemInformation().UUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
return retry.Constant(5*time.Minute, retry.WithUnits(30*time.Second)).Retry(func() error {
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := grpc.DialContext(ctx, endpoint, grpc.WithInsecure(), grpc.WithBlock())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
_, err = client.MarkServerAsWiped(ctx, &api.MarkServerAsWipedRequest{Uuid: uuid.String()})
|
||||
if err != nil {
|
||||
return retry.ExpectedError(err)
|
||||
}
|
||||
|
||||
c := api.NewAgentClient(conn)
|
||||
|
||||
_, err = c.MarkServerAsWiped(ctx, &api.MarkServerAsWipedRequest{Uuid: uuid.String()})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func reconcileIPs(endpoint string, s *smbios.Smbios, ips []net.IP) error {
|
||||
func reconcileIPs(ctx context.Context, client api.AgentClient, s *smbios.Smbios, ips []net.IP) error {
|
||||
uuid, err := s.SystemInformation().UUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := grpc.DialContext(ctx, endpoint, grpc.WithInsecure(), grpc.WithBlock())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
c := api.NewAgentClient(conn)
|
||||
|
||||
addresses := make([]*api.Address, len(ips))
|
||||
for i := range addresses {
|
||||
addresses[i] = &api.Address{
|
||||
@ -167,15 +149,20 @@ func reconcileIPs(endpoint string, s *smbios.Smbios, ips []net.IP) error {
|
||||
}
|
||||
}
|
||||
|
||||
_, err = c.ReconcileServerAddresses(ctx, &api.ReconcileServerAddressesRequest{
|
||||
Uuid: uuid.String(),
|
||||
Address: addresses,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return retry.Constant(5*time.Minute, retry.WithUnits(30*time.Second)).Retry(func() error {
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
return nil
|
||||
_, err = client.ReconcileServerAddresses(ctx, &api.ReconcileServerAddressesRequest{
|
||||
Uuid: uuid.String(),
|
||||
Address: addresses,
|
||||
})
|
||||
if err != nil {
|
||||
return retry.ExpectedError(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func shutdown(err error) {
|
||||
@ -186,6 +173,12 @@ func shutdown(err error) {
|
||||
log.Printf("rebooting in %d seconds\n", i)
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
if unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART) == nil {
|
||||
select {}
|
||||
}
|
||||
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF) == nil {
|
||||
@ -196,35 +189,65 @@ func shutdown(err error) {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func main() {
|
||||
func connect(ctx context.Context, endpoint string) (*grpc.ClientConn, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
return grpc.DialContext(ctx, endpoint, grpc.WithInsecure())
|
||||
}
|
||||
|
||||
func mainFunc() error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
if err := setup(); err != nil {
|
||||
shutdown(err)
|
||||
return err
|
||||
}
|
||||
|
||||
var endpoint string
|
||||
if found := procfs.ProcCmdline().Get(constants.AgentEndpointArg).First(); found != nil {
|
||||
endpoint = *found
|
||||
} else {
|
||||
shutdown(fmt.Errorf("no endpoint found"))
|
||||
return fmt.Errorf("no endpoint found")
|
||||
}
|
||||
|
||||
log.Printf("Using %q as API endpoint", endpoint)
|
||||
|
||||
conn, err := connect(ctx, endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer conn.Close()
|
||||
|
||||
client := api.NewAgentClient(conn)
|
||||
|
||||
log.Println("Reading SMBIOS")
|
||||
|
||||
s, err := smbios.New()
|
||||
if err != nil {
|
||||
shutdown(err)
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := create(endpoint, s)
|
||||
createResp, err := create(ctx, client, s)
|
||||
if err != nil {
|
||||
shutdown(err)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Println("Registration complete")
|
||||
|
||||
if resp.GetWipe() {
|
||||
ips, err := talosnet.IPAddrs()
|
||||
if err != nil {
|
||||
log.Println("failed to discover IPs")
|
||||
} else {
|
||||
if err = reconcileIPs(ctx, client, s, ips); err != nil {
|
||||
shutdown(err)
|
||||
}
|
||||
|
||||
log.Printf("Reconciled IPs")
|
||||
}
|
||||
|
||||
if createResp.GetWipe() {
|
||||
bds, err := probe.All()
|
||||
if err != nil {
|
||||
shutdown(err)
|
||||
@ -247,23 +270,16 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
if err := wipe(endpoint, s); err != nil {
|
||||
if err := wipe(ctx, client, s); err != nil {
|
||||
shutdown(err)
|
||||
}
|
||||
|
||||
log.Println("Wipe complete")
|
||||
}
|
||||
|
||||
ips, err := talosnet.IPAddrs()
|
||||
if err != nil {
|
||||
log.Println("failed to discover IPs")
|
||||
} else {
|
||||
if err := reconcileIPs(endpoint, s, ips); err != nil {
|
||||
shutdown(err)
|
||||
}
|
||||
|
||||
log.Printf("Reconciled IPs")
|
||||
}
|
||||
|
||||
shutdown(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
shutdown(mainFunc())
|
||||
}
|
||||
|
||||
@ -197,6 +197,49 @@ spec:
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
conditions:
|
||||
items:
|
||||
description: Condition defines an observation of a Cluster API resource
|
||||
operational state.
|
||||
properties:
|
||||
lastTransitionTime:
|
||||
description: Last time the condition transitioned from one status
|
||||
to another. This should be when the underlying condition changed.
|
||||
If that is not known, then using the time when the API field
|
||||
changed is acceptable.
|
||||
format: date-time
|
||||
type: string
|
||||
message:
|
||||
description: A human readable message indicating details about
|
||||
the transition. This field may be empty.
|
||||
type: string
|
||||
reason:
|
||||
description: The reason for the condition's last transition
|
||||
in CamelCase. The specific API may choose whether or not this
|
||||
field is considered a guaranteed API. This field may not be
|
||||
empty.
|
||||
type: string
|
||||
severity:
|
||||
description: Severity provides an explicit classification of
|
||||
Reason code, so the users or machines can immediately understand
|
||||
the current situation and act accordingly. The Severity field
|
||||
MUST be set only when Status=False.
|
||||
type: string
|
||||
status:
|
||||
description: Status of the condition, one of True, False, Unknown.
|
||||
type: string
|
||||
type:
|
||||
description: Type of condition in CamelCase or in foo.example.com/CamelCase.
|
||||
Many .condition.type values are consistent across resources
|
||||
like Available, but because arbitrary conditions can be useful
|
||||
(see .node.status.conditions), the ability to deconflict is
|
||||
important.
|
||||
type: string
|
||||
required:
|
||||
- status
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
inUse:
|
||||
type: boolean
|
||||
isClean:
|
||||
|
||||
@ -12,6 +12,7 @@ rules:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- patch
|
||||
- apiGroups:
|
||||
- metal.sidero.dev
|
||||
resources:
|
||||
|
||||
@ -13,6 +13,8 @@ import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/tools/reference"
|
||||
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
|
||||
"sigs.k8s.io/cluster-api/util/conditions"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller"
|
||||
@ -31,7 +33,7 @@ type ServerReconciler struct {
|
||||
|
||||
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers,verbs=get;list;watch;create;update;patch;delete
|
||||
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers/status,verbs=get;update;patch
|
||||
// +kubebuilder:rbac:groups="",resources=events,verbs=create
|
||||
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
|
||||
|
||||
func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
ctx := context.Background()
|
||||
@ -94,7 +96,7 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
}
|
||||
|
||||
if !mgmtClient.IsFake() {
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", "Server powered off.")
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server powered off.")
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,6 +104,10 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
case s.Status.InUse && !s.Status.IsClean:
|
||||
return f(true, false)
|
||||
case !s.Status.InUse && !s.Status.IsClean:
|
||||
if conditions.Has(&s, metalv1alpha1.ConditionPowerCycle) && conditions.IsFalse(&s, metalv1alpha1.ConditionPowerCycle) {
|
||||
return f(false, false)
|
||||
}
|
||||
|
||||
mgmtClient, err := metal.NewManagementClient(&s.Spec)
|
||||
if err != nil {
|
||||
log.Error(err, "failed to create management client")
|
||||
@ -110,7 +116,7 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
return f(false, true)
|
||||
}
|
||||
|
||||
_, err = mgmtClient.IsPoweredOn()
|
||||
poweredOn, err := mgmtClient.IsPoweredOn()
|
||||
if err != nil {
|
||||
log.Error(err, "failed to check power state")
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to determine power status: %s.", err))
|
||||
@ -126,16 +132,32 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
return f(false, true)
|
||||
}
|
||||
|
||||
err = mgmtClient.PowerCycle()
|
||||
if err != nil {
|
||||
log.Error(err, "failed to power cycle")
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to power cycle: %s.", err))
|
||||
if poweredOn {
|
||||
err = mgmtClient.PowerCycle()
|
||||
if err != nil {
|
||||
log.Error(err, "failed to power cycle")
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to power cycle: %s.", err))
|
||||
|
||||
return f(false, true)
|
||||
return f(false, true)
|
||||
}
|
||||
} else {
|
||||
err = mgmtClient.PowerOn()
|
||||
if err != nil {
|
||||
log.Error(err, "failed to power on")
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to power on: %s.", err))
|
||||
|
||||
return f(false, true)
|
||||
}
|
||||
}
|
||||
|
||||
if !mgmtClient.IsFake() {
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", "Server power cycled.")
|
||||
if poweredOn {
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server power cycled and set to PXE boot once.")
|
||||
} else {
|
||||
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Management", "Server powered on and set to PXE boot once.")
|
||||
}
|
||||
|
||||
conditions.MarkFalse(&s, metalv1alpha1.ConditionPowerCycle, "InProgress", clusterv1.ConditionSeverityInfo, "Server power cycled for wiping.")
|
||||
}
|
||||
|
||||
return f(false, false)
|
||||
|
||||
@ -23,6 +23,7 @@ import (
|
||||
"k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/tools/reference"
|
||||
"sigs.k8s.io/cluster-api/util/conditions"
|
||||
"sigs.k8s.io/cluster-api/util/patch"
|
||||
controllerclient "sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
@ -124,6 +125,8 @@ func (s *server) MarkServerAsWiped(ctx context.Context, in *api.MarkServerAsWipe
|
||||
|
||||
obj.Status.IsClean = true
|
||||
|
||||
conditions.MarkTrue(obj, metalv1alpha1.ConditionPowerCycle)
|
||||
|
||||
if err := patchHelper.Patch(ctx, obj); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
1
go.mod
1
go.mod
@ -17,6 +17,7 @@ require (
|
||||
github.com/talos-systems/cluster-api-control-plane-provider-talos v0.1.0-alpha.4
|
||||
github.com/talos-systems/go-blockdevice v0.1.0
|
||||
github.com/talos-systems/go-procfs v0.0.0-20200219015357-57c7311fdd45
|
||||
github.com/talos-systems/go-retry v0.1.1-0.20200922131245-752f081252cf
|
||||
github.com/talos-systems/go-smbios v0.0.0-20200219201045-94b8c4e489ee
|
||||
github.com/talos-systems/net v0.2.0
|
||||
github.com/talos-systems/talos v0.7.0-alpha.4
|
||||
|
||||
@ -69,11 +69,7 @@ func (c *Client) PowerOff() error {
|
||||
|
||||
// PowerCycle will power cycle a given machine.
|
||||
func (c *Client) PowerCycle() error {
|
||||
if err := c.postRequest("/poweroff"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.postRequest("/poweron")
|
||||
return c.postRequest("/reboot")
|
||||
}
|
||||
|
||||
// SetPXE makes sure the node will pxe boot next time.
|
||||
|
||||
@ -116,7 +116,9 @@ func TestManagementCluster(ctx context.Context, metalClient client.Client, clust
|
||||
|
||||
obj := obj
|
||||
|
||||
_, err = dr.Create(ctx, &obj, metav1.CreateOptions{})
|
||||
_, err = dr.Create(ctx, &obj, metav1.CreateOptions{
|
||||
FieldManager: "sfyra",
|
||||
})
|
||||
if err != nil {
|
||||
if apierrors.IsAlreadyExists(err) {
|
||||
_, err = dr.Patch(ctx, obj.GetName(), types.ApplyPatchType, data, metav1.PatchOptions{
|
||||
|
||||
88
sfyra/pkg/tests/reconcile.go
Normal file
88
sfyra/pkg/tests/reconcile.go
Normal file
@ -0,0 +1,88 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package tests
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/talos-systems/go-retry/retry"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"sigs.k8s.io/cluster-api/api/v1alpha3"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
)
|
||||
|
||||
// TestMachineDeploymentReconcile verifies that machine deployment can reconcile delete machines.
|
||||
func TestMachineDeploymentReconcile(ctx context.Context, metalClient client.Client) TestFunc {
|
||||
return func(t *testing.T) {
|
||||
var machineDeployment v1alpha3.MachineDeployment
|
||||
|
||||
const machineDeploymentName = "management-cluster-workers"
|
||||
|
||||
err := metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: machineDeploymentName}, &machineDeployment)
|
||||
require.NoError(t, err)
|
||||
|
||||
var machines v1alpha3.MachineList
|
||||
|
||||
labelSelector, err := labels.Parse(machineDeployment.Status.Selector)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = metalClient.List(ctx, &machines, client.MatchingLabelsSelector{Selector: labelSelector})
|
||||
require.NoError(t, err)
|
||||
|
||||
replicas := int32(len(machines.Items))
|
||||
|
||||
for _, machine := range machines.Items {
|
||||
machine := machine
|
||||
|
||||
err = metalClient.Delete(ctx, &machine)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// first, controller should pick up the fact that some replicas are missing
|
||||
err = retry.Constant(1*time.Minute, retry.WithUnits(100*time.Millisecond)).Retry(func() error {
|
||||
var machineDeployment v1alpha3.MachineDeployment
|
||||
|
||||
err = metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: machineDeploymentName}, &machineDeployment)
|
||||
if err != nil {
|
||||
return retry.UnexpectedError(err)
|
||||
}
|
||||
|
||||
if machineDeployment.Status.UnavailableReplicas != replicas {
|
||||
return retry.ExpectedError(fmt.Errorf("expected %d unavailable replicas, got %d", replicas, machineDeployment.Status.ReadyReplicas))
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// next, check that replicas get reconciled
|
||||
err = retry.Constant(10*time.Minute, retry.WithUnits(10*time.Second)).Retry(func() error {
|
||||
err = metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: machineDeploymentName}, &machineDeployment)
|
||||
if err != nil {
|
||||
return retry.UnexpectedError(err)
|
||||
}
|
||||
|
||||
if v1alpha3.MachineDeploymentPhase(machineDeployment.Status.Phase) != v1alpha3.MachineDeploymentPhaseRunning {
|
||||
return retry.ExpectedError(fmt.Errorf("expected %s phase, got %s", v1alpha3.MachineDeploymentPhaseRunning, machineDeployment.Status.Phase))
|
||||
}
|
||||
|
||||
if machineDeployment.Status.Replicas != replicas {
|
||||
return retry.ExpectedError(fmt.Errorf("expected %d replicas, got %d", replicas, machineDeployment.Status.Replicas))
|
||||
}
|
||||
|
||||
if machineDeployment.Status.ReadyReplicas != replicas {
|
||||
return retry.ExpectedError(fmt.Errorf("expected %d ready replicas, got %d", replicas, machineDeployment.Status.ReadyReplicas))
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
||||
@ -13,16 +13,14 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/talos-systems/go-retry/retry"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"sigs.k8s.io/cluster-api/util"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
metal "github.com/talos-systems/sidero/app/cluster-api-provider-sidero/api/v1alpha3"
|
||||
sidero "github.com/talos-systems/sidero/app/metal-controller-manager/api/v1alpha1"
|
||||
"github.com/talos-systems/sidero/sfyra/pkg/vm"
|
||||
)
|
||||
|
||||
// TestServerReset verifies that all the servers got reset.
|
||||
func TestServerReset(ctx context.Context, metalClient client.Client, vmSet *vm.Set) TestFunc {
|
||||
func TestServerReset(ctx context.Context, metalClient client.Client) TestFunc {
|
||||
return func(t *testing.T) {
|
||||
var machines metal.MetalMachineList
|
||||
|
||||
@ -40,14 +38,11 @@ func TestServerReset(ctx context.Context, metalClient client.Client, vmSet *vm.S
|
||||
}
|
||||
|
||||
serverNamesToCheck = append(serverNamesToCheck, machines.Items[i].Spec.ServerRef.Name)
|
||||
|
||||
ownerMachine, err := util.GetOwnerMachine(ctx, metalClient, machines.Items[i].ObjectMeta)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = metalClient.Delete(ctx, ownerMachine)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
err = scaleWorkers(ctx, metalClient, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = retry.Constant(5*time.Minute, retry.WithUnits(10*time.Second)).Retry(func() error {
|
||||
var servers sidero.ServerList
|
||||
|
||||
@ -79,8 +74,6 @@ func TestServerReset(ctx context.Context, metalClient client.Client, vmSet *vm.S
|
||||
return nil
|
||||
})
|
||||
|
||||
// TODO: Wait for machinedeployment to reconcile deleted machine.
|
||||
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,6 +102,10 @@ func scaleWorkers(ctx context.Context, metalClient client.Client, replicas int32
|
||||
return fmt.Errorf("expected %s phase, got %s", v1alpha3.MachineDeploymentPhaseRunning, o.Status.Phase)
|
||||
}
|
||||
|
||||
if o.Status.Replicas != replicas {
|
||||
return fmt.Errorf("expected %d replicas, got %d", replicas, o.Status.Replicas)
|
||||
}
|
||||
|
||||
if o.Status.ReadyReplicas != replicas {
|
||||
return fmt.Errorf("expected %d ready replicas, got %d", replicas, o.Status.ReadyReplicas)
|
||||
}
|
||||
@ -123,6 +127,8 @@ func scaleWorkers(ctx context.Context, metalClient client.Client, replicas int32
|
||||
}
|
||||
|
||||
func scale(ctx context.Context, metalClient client.Client, name string, obj runtime.Object, set, verify ScaleCallBack) error {
|
||||
cleanObj := obj.DeepCopyObject()
|
||||
|
||||
err := metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: name}, obj)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -133,12 +139,16 @@ func scale(ctx context.Context, metalClient client.Client, name string, obj runt
|
||||
return err
|
||||
}
|
||||
|
||||
err = metalClient.Update(ctx, obj)
|
||||
err = metalClient.Update(ctx, obj, &client.UpdateOptions{
|
||||
FieldManager: "sfyra",
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = retry.Constant(10*time.Minute, retry.WithUnits(10*time.Second)).Retry(func() error {
|
||||
obj = cleanObj.DeepCopyObject()
|
||||
|
||||
err := metalClient.Get(ctx, types.NamespacedName{Namespace: "default", Name: name}, obj)
|
||||
if err != nil {
|
||||
return retry.UnexpectedError(err)
|
||||
|
||||
@ -99,9 +99,13 @@ func Run(ctx context.Context, cluster talos.Cluster, vmSet *vm.Set, capiManager
|
||||
"TestScaleControlPlaneDown",
|
||||
TestScaleControlPlaneDown(ctx, metalClient, vmSet),
|
||||
},
|
||||
{
|
||||
"TestMachineDeploymentReconcile",
|
||||
TestMachineDeploymentReconcile(ctx, metalClient),
|
||||
},
|
||||
{
|
||||
"TestServerReset",
|
||||
TestServerReset(ctx, metalClient, vmSet),
|
||||
TestServerReset(ctx, metalClient),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user