mirror of
https://github.com/siderolabs/sidero.git
synced 2025-10-18 11:01:10 +02:00
470 lines
15 KiB
Go
470 lines
15 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
package controllers
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
|
|
"github.com/go-logr/logr"
|
|
"github.com/siderolabs/go-pointer"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
"k8s.io/apimachinery/pkg/fields"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/client-go/tools/record"
|
|
"k8s.io/client-go/tools/reference"
|
|
capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"
|
|
"sigs.k8s.io/cluster-api/controllers/remote"
|
|
"sigs.k8s.io/cluster-api/util"
|
|
"sigs.k8s.io/cluster-api/util/conditions"
|
|
"sigs.k8s.io/cluster-api/util/patch"
|
|
ctrl "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/controller"
|
|
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
|
"sigs.k8s.io/controller-runtime/pkg/handler"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
|
|
infrav1 "github.com/siderolabs/sidero/app/caps-controller-manager/api/v1alpha3"
|
|
"github.com/siderolabs/sidero/app/caps-controller-manager/pkg/constants"
|
|
metalv1 "github.com/siderolabs/sidero/app/sidero-controller-manager/api/v1alpha2"
|
|
)
|
|
|
|
var ErrNoServersInServerClass = errors.New("no servers available in serverclass")
|
|
|
|
// MetalMachineReconciler reconciles a MetalMachine object.
|
|
type MetalMachineReconciler struct {
|
|
client.Client
|
|
Log logr.Logger
|
|
Scheme *runtime.Scheme
|
|
Recorder record.EventRecorder
|
|
Tracker *remote.ClusterCacheTracker
|
|
}
|
|
|
|
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metalmachines,verbs=get;list;watch;create;update;patch;delete
|
|
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metalmachines/status,verbs=get;update;patch
|
|
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch
|
|
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=serverclasses,verbs=get;list;watch;
|
|
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=serverclasses/status,verbs=get;list;watch;
|
|
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers,verbs=get;list;watch;
|
|
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers/status,verbs=get;update;patch
|
|
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete
|
|
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
|
|
|
|
func (r *MetalMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, err error) {
|
|
logger := r.Log.WithValues("metalmachine", req.NamespacedName)
|
|
|
|
// Fetch the metalMachine instance.
|
|
metalMachine := &infrav1.MetalMachine{}
|
|
|
|
err = r.Get(ctx, req.NamespacedName, metalMachine)
|
|
if apierrors.IsNotFound(err) {
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
if err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
machine, err := util.GetOwnerMachine(ctx, r.Client, metalMachine.ObjectMeta)
|
|
if err != nil {
|
|
r.Log.Error(err, "Failed to get machine")
|
|
|
|
return ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter}, nil
|
|
}
|
|
|
|
if machine == nil {
|
|
logger.Info("No ownerref for metalmachine")
|
|
|
|
return ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter}, nil
|
|
}
|
|
|
|
logger = logger.WithName(fmt.Sprintf("machine=%s", machine.Name))
|
|
|
|
cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machine.ObjectMeta)
|
|
if err != nil {
|
|
return ctrl.Result{}, fmt.Errorf("no cluster label or cluster does not exist")
|
|
}
|
|
|
|
logger = logger.WithName(fmt.Sprintf("cluster=%s", cluster.Name))
|
|
|
|
if !cluster.Status.InfrastructureReady {
|
|
logger.Error(err, "Cluster infrastructure is not ready", "cluster", cluster.Name)
|
|
|
|
return ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter}, nil
|
|
}
|
|
|
|
if machine.Spec.Bootstrap.DataSecretName == nil {
|
|
logger.Info("Bootstrap secret is not available yet")
|
|
|
|
return ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter}, nil
|
|
}
|
|
|
|
// Initialize the patch helper
|
|
patchHelper, err := patch.NewHelper(metalMachine, r.Client)
|
|
if err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
// Always attempt to Patch the MetalMachine object and status after each reconciliation.
|
|
defer func() {
|
|
if e := patchHelper.Patch(ctx, metalMachine); e != nil {
|
|
logger.Error(e, "failed to patch metalMachine")
|
|
|
|
if err == nil {
|
|
err = e
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Handle deleted machines
|
|
if !metalMachine.ObjectMeta.DeletionTimestamp.IsZero() {
|
|
logger.Info("deleting metalmachine")
|
|
|
|
return r.reconcileDelete(ctx, metalMachine)
|
|
}
|
|
|
|
controllerutil.AddFinalizer(metalMachine, infrav1.MachineFinalizer)
|
|
|
|
// TODO (smira):
|
|
// This is really weird that .Spec.ServerRef is used both to set the manual link to the server by the user
|
|
// and a way to store current binding when server is picked up automatically
|
|
//
|
|
// This should be refactored to keep _current_ binding ref in the .Status, while
|
|
// .spec should be reserved for manually chosen server only.
|
|
// This opens a question what to do if the `.Status` is lost after pivoting, how to reconcile it correctly?
|
|
|
|
// if server binding is missing, need to pick up a server
|
|
if metalMachine.Spec.ServerRef == nil {
|
|
if metalMachine.Spec.ServerClassRef == nil {
|
|
return ctrl.Result{}, fmt.Errorf("either a server or serverclass ref must be supplied")
|
|
}
|
|
|
|
serverResource, err := r.fetchServerFromClass(ctx, logger, metalMachine.Spec.ServerClassRef, metalMachine)
|
|
if err != nil {
|
|
if errors.Is(err, ErrNoServersInServerClass) {
|
|
return ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter}, nil
|
|
}
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
metalMachine.Spec.ServerRef = &corev1.ObjectReference{
|
|
Kind: serverResource.Kind,
|
|
Name: serverResource.Name,
|
|
}
|
|
} else {
|
|
// If server ref is set, it could have been set by "us", or by the user.
|
|
// In any case, check if we need to create server binding
|
|
var serverBinding infrav1.ServerBinding
|
|
|
|
err = r.Get(ctx, types.NamespacedName{Namespace: metalMachine.Spec.ServerRef.Namespace, Name: metalMachine.Spec.ServerRef.Name}, &serverBinding)
|
|
if err != nil {
|
|
if apierrors.IsNotFound(err) {
|
|
var serverObj metalv1.Server
|
|
|
|
namespacedName := types.NamespacedName{
|
|
Namespace: "",
|
|
Name: metalMachine.Spec.ServerRef.Name,
|
|
}
|
|
|
|
if err := r.Get(ctx, namespacedName, &serverObj); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
serverRef, err := reference.GetReference(r.Scheme, &serverObj)
|
|
if err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
if err = r.createServerBinding(ctx, nil, &serverObj, metalMachine); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Assignment", fmt.Sprintf("Server as assigned via serverRef for metal machine %q.", metalMachine.Name))
|
|
}
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
}
|
|
|
|
// Set the providerID, as its required in upstream capi for machine lifecycle
|
|
metalMachine.Spec.ProviderID = pointer.To(fmt.Sprintf("%s://%s", constants.ProviderID, metalMachine.Spec.ServerRef.Name))
|
|
|
|
// Copy over statuses from ServerBinding to MetalMachine
|
|
if metalMachine.Spec.ServerRef != nil {
|
|
var serverBinding infrav1.ServerBinding
|
|
|
|
err = r.Get(ctx, types.NamespacedName{Namespace: metalMachine.Spec.ServerRef.Namespace, Name: metalMachine.Spec.ServerRef.Name}, &serverBinding)
|
|
if err != nil {
|
|
if apierrors.IsNotFound(err) {
|
|
return ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter}, nil
|
|
}
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
addresses := make([]capiv1.MachineAddress, 0, len(serverBinding.Spec.Addresses))
|
|
for _, addr := range serverBinding.Spec.Addresses {
|
|
addresses = append(addresses, capiv1.MachineAddress{
|
|
Type: capiv1.MachineInternalIP,
|
|
Address: addr,
|
|
})
|
|
}
|
|
|
|
if serverBinding.Spec.Hostname != "" {
|
|
addresses = append(addresses, capiv1.MachineAddress{
|
|
Type: capiv1.MachineHostName,
|
|
Address: serverBinding.Spec.Hostname,
|
|
})
|
|
}
|
|
|
|
metalMachine.Status.Addresses = addresses
|
|
metalMachine.Status.Ready = true
|
|
|
|
// copy conditions from the server binding
|
|
for _, condition := range serverBinding.GetConditions() {
|
|
conditions.Set(metalMachine, &condition)
|
|
}
|
|
}
|
|
|
|
err = r.patchProviderID(ctx, cluster, metalMachine)
|
|
if err != nil {
|
|
logger.Info("Failed to set provider ID", "error", err)
|
|
|
|
conditions.MarkFalse(metalMachine, infrav1.ProviderSetCondition, infrav1.ProviderUpdateFailedReason, capiv1.ConditionSeverityWarning, err.Error())
|
|
|
|
return ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter}, nil
|
|
}
|
|
|
|
conditions.MarkTrue(metalMachine, infrav1.ProviderSetCondition)
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
func (r *MetalMachineReconciler) reconcileDelete(ctx context.Context, metalMachine *infrav1.MetalMachine) (ctrl.Result, error) {
|
|
if metalMachine.Spec.ServerRef != nil {
|
|
var serverBinding infrav1.ServerBinding
|
|
|
|
err := r.Get(ctx, types.NamespacedName{Namespace: metalMachine.Spec.ServerRef.Namespace, Name: metalMachine.Spec.ServerRef.Name}, &serverBinding)
|
|
if err == nil {
|
|
return ctrl.Result{Requeue: true}, r.Delete(ctx, &serverBinding)
|
|
}
|
|
|
|
if !apierrors.IsNotFound(err) {
|
|
return ctrl.Result{}, err
|
|
}
|
|
}
|
|
|
|
metalMachine.Spec.ServerRef = nil
|
|
|
|
controllerutil.RemoveFinalizer(metalMachine, infrav1.MachineFinalizer)
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
func (r *MetalMachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
|
|
if err := mgr.GetFieldIndexer().IndexField(ctx, &infrav1.ServerBinding{}, infrav1.ServerBindingMetalMachineRefField, func(rawObj client.Object) []string {
|
|
serverBinding := rawObj.(*infrav1.ServerBinding)
|
|
|
|
return []string{serverBinding.Spec.MetalMachineRef.Name}
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
mapRequests := func(ctx context.Context, a client.Object) []reconcile.Request {
|
|
serverBinding := &infrav1.ServerBinding{}
|
|
|
|
if err := r.Get(ctx, types.NamespacedName{Namespace: a.GetNamespace(), Name: a.GetName()}, serverBinding); err != nil {
|
|
return nil
|
|
}
|
|
|
|
return []reconcile.Request{
|
|
{
|
|
NamespacedName: types.NamespacedName{
|
|
Name: serverBinding.Spec.MetalMachineRef.Name,
|
|
Namespace: serverBinding.Spec.MetalMachineRef.Namespace,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
return ctrl.NewControllerManagedBy(mgr).
|
|
WithOptions(options).
|
|
For(&infrav1.MetalMachine{}).
|
|
Watches(
|
|
&infrav1.ServerBinding{},
|
|
handler.EnqueueRequestsFromMapFunc(mapRequests),
|
|
).
|
|
Complete(r)
|
|
}
|
|
|
|
func (r *MetalMachineReconciler) fetchServerFromClass(ctx context.Context, logger logr.Logger, classRef *corev1.ObjectReference, metalMachine *infrav1.MetalMachine) (*metalv1.Server, error) {
|
|
// First, check if there is already existing serverBinding for this metalmachine
|
|
var serverBindingList infrav1.ServerBindingList
|
|
|
|
if err := r.List(ctx, &serverBindingList, client.MatchingFields(fields.Set{infrav1.ServerBindingMetalMachineRefField: metalMachine.Name})); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, serverBinding := range serverBindingList.Items {
|
|
if serverBinding.Spec.MetalMachineRef.Namespace == metalMachine.Namespace && serverBinding.Spec.MetalMachineRef.Name == metalMachine.Name {
|
|
// found existing serverBinding for this metalMachine
|
|
var server metalv1.Server
|
|
|
|
if err := r.Get(ctx, types.NamespacedName{Namespace: serverBinding.Namespace, Name: serverBinding.Name}, &server); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
logger.Info("reconciled missing server ref", "metalmachine", metalMachine.Name, "server", server.Name)
|
|
|
|
return &server, nil
|
|
}
|
|
}
|
|
|
|
// Grab server class and validate that we have nodes available
|
|
serverClassResource, err := r.fetchServerClass(ctx, classRef)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(serverClassResource.Status.ServersAvailable) == 0 {
|
|
return nil, ErrNoServersInServerClass
|
|
}
|
|
|
|
serverClassRef, err := reference.GetReference(r.Scheme, serverClassResource)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Fetch server from available list
|
|
// NB: we added this loop to double check that an available server isn't "in use" because
|
|
// we saw raciness between server selection and it being removed from the ServersAvailable list.
|
|
for _, availServer := range serverClassResource.Status.ServersAvailable {
|
|
serverObj := &metalv1.Server{}
|
|
|
|
namespacedName := types.NamespacedName{
|
|
Namespace: "",
|
|
Name: availServer,
|
|
}
|
|
|
|
if err := r.Get(ctx, namespacedName, serverObj); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
serverRef, err := reference.GetReference(r.Scheme, serverObj)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if serverObj.Status.InUse {
|
|
continue
|
|
}
|
|
|
|
if !serverObj.Status.IsClean {
|
|
continue
|
|
}
|
|
|
|
if err := r.createServerBinding(ctx, serverClassRef, serverObj, metalMachine); err != nil {
|
|
// the server we picked was updated by another metalmachine before we finished.
|
|
// move on to the next one.
|
|
if apierrors.IsAlreadyExists(err) {
|
|
continue
|
|
}
|
|
|
|
return nil, err
|
|
}
|
|
|
|
r.Recorder.Event(serverRef, corev1.EventTypeNormal, "Server Allocation", fmt.Sprintf("Server is allocated via serverclass %q for metal machine %q.", serverClassResource.Name, metalMachine.Name))
|
|
|
|
logger.Info("allocated new server", "metalmachine", metalMachine.Name, "server", serverObj.Name, "serverclass", serverClassResource.Name)
|
|
|
|
return serverObj, nil
|
|
}
|
|
|
|
return nil, ErrNoServersInServerClass
|
|
}
|
|
|
|
func (r *MetalMachineReconciler) patchProviderID(ctx context.Context, cluster *capiv1.Cluster, metalMachine *infrav1.MetalMachine) error {
|
|
workloadClient, err := r.Tracker.GetClient(ctx, client.ObjectKeyFromObject(cluster))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var nodes corev1.NodeList
|
|
|
|
if err = workloadClient.List(ctx, &nodes, client.MatchingLabels{"metal.sidero.dev/uuid": metalMachine.Spec.ServerRef.Name}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(nodes.Items) == 0 {
|
|
return fmt.Errorf("no matching nodes found")
|
|
}
|
|
|
|
if len(nodes.Items) > 1 {
|
|
return fmt.Errorf("multiple nodes found with same uuid label")
|
|
}
|
|
|
|
providerID := fmt.Sprintf("%s://%s", constants.ProviderID, metalMachine.Spec.ServerRef.Name)
|
|
|
|
node := nodes.Items[0]
|
|
|
|
if node.Spec.ProviderID == providerID {
|
|
return nil
|
|
}
|
|
|
|
patchHelper, err := patch.NewHelper(&node, workloadClient)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
r.Log.Info("Setting provider ID", "id", providerID)
|
|
|
|
node.Spec.ProviderID = providerID
|
|
|
|
return patchHelper.Patch(ctx, &node)
|
|
}
|
|
|
|
// createServerBinding updates a server to mark it as "in use" via ServerBinding resource.
|
|
func (r *MetalMachineReconciler) createServerBinding(ctx context.Context, serverClassRef *corev1.ObjectReference, serverObj *metalv1.Server, metalMachine *infrav1.MetalMachine) error {
|
|
var serverBinding infrav1.ServerBinding
|
|
|
|
serverBinding.Namespace = serverObj.Namespace
|
|
serverBinding.Name = serverObj.Name
|
|
serverBinding.Labels = make(map[string]string)
|
|
serverBinding.Spec.MetalMachineRef = corev1.ObjectReference{
|
|
Kind: metalMachine.Kind,
|
|
UID: metalMachine.UID,
|
|
Namespace: metalMachine.Namespace,
|
|
Name: metalMachine.Name,
|
|
}
|
|
|
|
serverBinding.Spec.ServerClassRef = serverClassRef.DeepCopy()
|
|
|
|
for label, value := range metalMachine.Labels {
|
|
serverBinding.Labels[label] = value
|
|
}
|
|
|
|
return r.Create(ctx, &serverBinding)
|
|
}
|
|
|
|
func (r *MetalMachineReconciler) fetchServerClass(ctx context.Context, classRef *corev1.ObjectReference) (*metalv1.ServerClass, error) {
|
|
serverClassResource := &metalv1.ServerClass{}
|
|
|
|
namespacedName := types.NamespacedName{
|
|
Namespace: classRef.Namespace,
|
|
Name: classRef.Name,
|
|
}
|
|
|
|
if err := r.Get(ctx, namespacedName, serverClassResource); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return serverClassResource, nil
|
|
}
|