diff --git a/client/api/omni/specs/machine_status.go b/client/api/omni/specs/machine_status.go new file mode 100644 index 00000000..caf76523 --- /dev/null +++ b/client/api/omni/specs/machine_status.go @@ -0,0 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package specs + +func (spec *MachineStatusSpec) SchematicReady() bool { + return spec.Schematic != nil && !spec.Schematic.InAgentMode && spec.Schematic.Id != "" && spec.Schematic.FullId != "" +} diff --git a/internal/backend/grpc/management.go b/internal/backend/grpc/management.go index c1cce48d..e2fe08d3 100644 --- a/internal/backend/grpc/management.go +++ b/internal/backend/grpc/management.go @@ -708,9 +708,8 @@ func (s *managementServer) MaintenanceUpgrade(ctx context.Context, req *manageme return nil, status.Error(codes.FailedPrecondition, "machine is not in maintenance mode") } - schematic := machineStatus.TypedSpec().Value.Schematic - if schematic == nil || schematic.FullId == "" { - return nil, status.Error(codes.FailedPrecondition, "machine schematic is not known yet") + if !machineStatus.TypedSpec().Value.SchematicReady() { + return nil, status.Error(codes.FailedPrecondition, "machine schematic is not ready yet") } platform := machineStatus.TypedSpec().Value.GetPlatformMetadata().GetPlatform() @@ -725,7 +724,7 @@ func (s *managementServer) MaintenanceUpgrade(ctx context.Context, req *manageme installImage := &specs.MachineConfigGenOptionsSpec_InstallImage{ TalosVersion: req.Version, - SchematicId: schematic.FullId, + SchematicId: machineStatus.TypedSpec().Value.Schematic.FullId, SchematicInitialized: true, Platform: platform, SecurityState: securityState, diff --git a/internal/backend/kernelargs/kernelargs.go b/internal/backend/kernelargs/kernelargs.go index d3edb5cb..648b805b 100644 --- a/internal/backend/kernelargs/kernelargs.go +++ b/internal/backend/kernelargs/kernelargs.go @@ -103,8 +103,7 @@ func UpdateSupported(machineStatus *omni.MachineStatus, getClusterMachineConfig } func Calculate(machineStatus *omni.MachineStatus, kernelArgs *omni.KernelArgs) (args []string, initialized bool, err error) { - schematicConfig := machineStatus.TypedSpec().Value.Schematic - if schematicConfig == nil { + if !machineStatus.TypedSpec().Value.SchematicReady() { return nil, false, nil } @@ -118,7 +117,7 @@ func Calculate(machineStatus *omni.MachineStatus, kernelArgs *omni.KernelArgs) ( extraArgs = kernelArgs.TypedSpec().Value.Args } - baseArgs := xslices.Filter(schematicConfig.KernelArgs, isProtected) + baseArgs := xslices.Filter(machineStatus.TypedSpec().Value.Schematic.KernelArgs, isProtected) return slices.Concat(baseArgs, extraArgs), true, nil } diff --git a/internal/backend/runtime/omni/controllers/omni/infra_machine.go b/internal/backend/runtime/omni/controllers/omni/infra_machine.go index bcecb889..2ba3cd75 100644 --- a/internal/backend/runtime/omni/controllers/omni/infra_machine.go +++ b/internal/backend/runtime/omni/controllers/omni/infra_machine.go @@ -74,11 +74,6 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings { Type: omni.InfraMachineConfigType, Kind: controller.InputQMapped, }, - { - Namespace: resources.DefaultNamespace, - Type: omni.SchematicConfigurationType, - Kind: controller.InputQMapped, - }, { Namespace: resources.DefaultNamespace, Type: omni.MachineExtensionsType, @@ -104,6 +99,11 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings { Type: infra.ProviderType, Kind: controller.InputQMapped, }, + { + Namespace: resources.DefaultNamespace, + Type: omni.ClusterType, + Kind: controller.InputQMapped, + }, }, Outputs: []controller.Output{ { @@ -127,7 +127,7 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings { } // Reconcile implements the controller.QController interface. -func (ctrl *InfraMachineController) Reconcile(ctx context.Context, _ *zap.Logger, r controller.QRuntime, ptr resource.Pointer) error { +func (ctrl *InfraMachineController) Reconcile(ctx context.Context, logger *zap.Logger, r controller.QRuntime, ptr resource.Pointer) error { link, err := safe.ReaderGet[*siderolink.Link](ctx, r, ptr) if err != nil { if !state.IsNotFoundError(err) { @@ -143,7 +143,7 @@ func (ctrl *InfraMachineController) Reconcile(ctx context.Context, _ *zap.Logger return ctrl.reconcileTearingDown(ctx, r, link) } - return ctrl.reconcileRunning(ctx, r, link) + return ctrl.reconcileRunning(ctx, r, link, logger) } func (ctrl *InfraMachineController) reconcileTearingDown(ctx context.Context, r controller.QRuntime, link *siderolink.Link) error { @@ -214,7 +214,7 @@ func (ctrl *InfraMachineController) handleInfraProviderDeletion(ctx context.Cont }, controller.WithExpectedPhaseAny()) } -func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r controller.QRuntime, link *siderolink.Link) error { +func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r controller.QRuntime, link *siderolink.Link, logger *zap.Logger) error { config, err := safe.ReaderGetByID[*omni.InfraMachineConfig](ctx, r, link.Metadata().ID()) if err != nil && !state.IsNotFoundError(err) { return err @@ -260,6 +260,7 @@ func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r cont machineInfoCollected: machineInfoCollected, providerID: providerID, controllerName: ctrl.Name(), + logger: logger, } return safe.WriterModify[*infra.Machine](ctx, r, infra.NewMachine(link.Metadata().ID()), func(res *infra.Machine) error { @@ -273,6 +274,7 @@ type infraMachineControllerHelper struct { machineExts *omni.MachineExtensions link *siderolink.Link nodeUniqueToken *siderolink.NodeUniqueToken + logger *zap.Logger providerID string controllerName string machineInfoCollected bool @@ -333,16 +335,6 @@ func (helper *infraMachineControllerHelper) modify(ctx context.Context, infraMac return err } - schematicConfig, err := safe.ReaderGetByID[*omni.SchematicConfiguration](ctx, helper.runtime, helper.link.Metadata().ID()) - if err != nil { - if state.IsNotFoundError(err) { - // the schema configuration is not created yet, skip the cluster information collection - return nil - } - - return err - } - var extensions []string if helper.machineExts != nil { @@ -351,12 +343,37 @@ func (helper *infraMachineControllerHelper) modify(ctx context.Context, infraMac // set the cluster allocation information - infraMachine.TypedSpec().Value.ClusterTalosVersion = schematicConfig.TypedSpec().Value.TalosVersion + clusterTalosVersion, err := helper.getClusterTalosVersion(ctx, helper.runtime, clusterMachine) + if err != nil { + return err + } + + infraMachine.TypedSpec().Value.ClusterTalosVersion = clusterTalosVersion infraMachine.TypedSpec().Value.Extensions = extensions return nil } +func (helper *infraMachineControllerHelper) getClusterTalosVersion(ctx context.Context, r controller.Reader, clusterMachine *omni.ClusterMachine) (string, error) { + clusterID, ok := clusterMachine.Metadata().Labels().Get(omni.LabelCluster) + if !ok { + return "", fmt.Errorf("cluster machine %q is missing cluster label", clusterMachine.Metadata().ID()) + } + + cluster, err := safe.ReaderGetByID[*omni.Cluster](ctx, r, clusterID) + if err != nil { + if state.IsNotFoundError(err) { + helper.logger.Info("cluster not yet found for cluster machine", zap.String("cluster_machine_id", clusterMachine.Metadata().ID()), zap.String("cluster_id", clusterID)) + + return "", nil + } + + return "", fmt.Errorf("failed to get cluster %q: %w", clusterID, err) + } + + return cluster.TypedSpec().Value.TalosVersion, nil +} + // MapInput implements the controller.QController interface. func (ctrl *InfraMachineController) MapInput(ctx context.Context, _ *zap.Logger, runtime controller.QRuntime, ptr controller.ReducedResourceMetadata) ([]resource.Pointer, error) { switch ptr.Type() { @@ -369,6 +386,18 @@ func (ctrl *InfraMachineController) MapInput(ctx context.Context, _ *zap.Logger, omni.ClusterMachineType, omni.MachineStatusType: return []resource.Pointer{siderolink.NewLink(ptr.ID(), nil).Metadata()}, nil + case omni.ClusterType: + clusterMachineList, err := safe.ReaderListAll[*omni.ClusterMachine](ctx, runtime, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, ptr.ID()))) + if err != nil { + return nil, err + } + + linkPtrs := make([]resource.Pointer, 0, clusterMachineList.Len()) + for clusterMachine := range clusterMachineList.All() { + linkPtrs = append(linkPtrs, siderolink.NewLink(clusterMachine.Metadata().ID(), nil).Metadata()) + } + + return linkPtrs, nil case infra.InfraProviderStatusType, infra.ProviderType: linkList, err := safe.ReaderListAll[*siderolink.Link](ctx, runtime, state.WithLabelQuery(resource.LabelEqual(omni.LabelInfraProviderID, ptr.ID()))) if err != nil { diff --git a/internal/backend/runtime/omni/controllers/omni/infra_machine_test.go b/internal/backend/runtime/omni/controllers/omni/infra_machine_test.go index 5494463b..db5ed868 100644 --- a/internal/backend/runtime/omni/controllers/omni/infra_machine_test.go +++ b/internal/backend/runtime/omni/controllers/omni/infra_machine_test.go @@ -89,12 +89,11 @@ func (suite *InfraMachineControllerSuite) TestReconcile() { suite.Require().NoError(suite.state.Create(suite.ctx, clusterMachine)) - // create schematic configuration - schematicConfig := omni.NewSchematicConfiguration("machine-1") + // create cluster with talos version + cluster := omni.NewCluster("test-cluster") + cluster.TypedSpec().Value.TalosVersion = "v42.0.0" - schematicConfig.TypedSpec().Value.TalosVersion = "v42.0.0" - - suite.Require().NoError(suite.state.Create(suite.ctx, schematicConfig)) + suite.Require().NoError(suite.state.Create(suite.ctx, cluster)) // assert that the cluster machine has the correct talos version assertResource[*infra.Machine](&suite.OmniSuite, clusterMachine.Metadata(), func(r *infra.Machine, assertion *assert.Assertions) { diff --git a/internal/backend/runtime/omni/controllers/omni/internal/talos/schematic.go b/internal/backend/runtime/omni/controllers/omni/internal/talos/schematic.go index 29868ff6..0833df1d 100644 --- a/internal/backend/runtime/omni/controllers/omni/internal/talos/schematic.go +++ b/internal/backend/runtime/omni/controllers/omni/internal/talos/schematic.go @@ -11,7 +11,6 @@ import ( "strings" "github.com/cosi-project/runtime/pkg/safe" - "github.com/siderolabs/go-pointer" "github.com/siderolabs/image-factory/pkg/constants" "github.com/siderolabs/image-factory/pkg/schematic" "github.com/siderolabs/talos/pkg/machinery/client" @@ -52,15 +51,12 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, fallbackKernelArgs fullID string rawSchematic = &schematic.Schematic{} manifest string - inAgentMode bool ) - err = items.ForEachErr(func(status *runtime.ExtensionStatus) error { + for status := range items.All() { name := status.TypedSpec().Metadata.Name if name == extensions.MetalAgentExtensionName { - inAgentMode = true - - return nil + return SchematicInfo{InAgentMode: true}, nil } if name == constants.SchematicIDExtensionName { // skip the meta extension @@ -69,14 +65,16 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, fallbackKernelArgs if status.TypedSpec().Metadata.ExtraInfo != "" { manifest = status.TypedSpec().Metadata.ExtraInfo - return yaml.Unmarshal([]byte(manifest), rawSchematic) + if err = yaml.Unmarshal([]byte(manifest), rawSchematic); err != nil { + return SchematicInfo{}, fmt.Errorf("failed to unmarshal schematic manifest: %w", err) + } } - return nil + continue } if name == "modules.dep" { // ignore the virtual extension used for kernel modules dependencies - return nil + continue } if !strings.HasPrefix(name, extensions.OfficialPrefix) { @@ -84,24 +82,6 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, fallbackKernelArgs } exts = append(exts, name) - - return nil - }) - if err != nil { - return SchematicInfo{}, err - } - - if inAgentMode { - id, idErr := pointer.To(schematic.Schematic{}).ID() - if idErr != nil { - return SchematicInfo{}, fmt.Errorf("failed to calculate extensions schematic ID: %w", idErr) - } - - return SchematicInfo{ - ID: id, - FullID: id, - InAgentMode: true, - }, nil } exts = extensions.MapNames(exts) diff --git a/internal/backend/runtime/omni/controllers/omni/kernelargs/status.go b/internal/backend/runtime/omni/controllers/omni/kernelargs/status.go index 40729ead..46bf6ec5 100644 --- a/internal/backend/runtime/omni/controllers/omni/kernelargs/status.go +++ b/internal/backend/runtime/omni/controllers/omni/kernelargs/status.go @@ -40,8 +40,8 @@ func NewStatusController() *StatusController { status.TypedSpec().Value.UnmetConditions = nil status.TypedSpec().Value.CurrentCmdline = ms.TypedSpec().Value.KernelCmdline - if ms.TypedSpec().Value.Schematic == nil { - status.TypedSpec().Value.UnmetConditions = append(status.TypedSpec().Value.UnmetConditions, "Schematic information is not yet known") + if !ms.TypedSpec().Value.SchematicReady() { + status.TypedSpec().Value.UnmetConditions = append(status.TypedSpec().Value.UnmetConditions, "Schematic information is not yet ready") } else { status.TypedSpec().Value.CurrentArgs = kernelargs.FilterExtras(ms.TypedSpec().Value.Schematic.KernelArgs) } diff --git a/internal/backend/runtime/omni/controllers/omni/kernelargs/status_test.go b/internal/backend/runtime/omni/controllers/omni/kernelargs/status_test.go index 59bdc9b8..9cdc8d7f 100644 --- a/internal/backend/runtime/omni/controllers/omni/kernelargs/status_test.go +++ b/internal/backend/runtime/omni/controllers/omni/kernelargs/status_test.go @@ -38,7 +38,7 @@ func TestReconcile(t *testing.T) { rtestutils.AssertResource(ctx, t, testContext.State, id, func(res *omni.KernelArgsStatus, assertion *assert.Assertions) { assertion.Equal([]string{ - "Schematic information is not yet known", + "Schematic information is not yet ready", "Talos is not installed, kernel args cannot be updated yet", "Cannot determine if kernel args update is supported: SecurityState and TalosVersion are not yet set", }, res.TypedSpec().Value.UnmetConditions) @@ -59,6 +59,8 @@ func TestReconcile(t *testing.T) { } res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{ + Id: "test-id", + FullId: "test-full-id", KernelArgs: []string{"arg-1", "arg-2"}, } diff --git a/internal/backend/runtime/omni/controllers/omni/machine_config_gen_options.go b/internal/backend/runtime/omni/controllers/omni/machine_config_gen_options.go index 27e0991e..541b692e 100644 --- a/internal/backend/runtime/omni/controllers/omni/machine_config_gen_options.go +++ b/internal/backend/runtime/omni/controllers/omni/machine_config_gen_options.go @@ -69,7 +69,7 @@ func GenInstallConfig(machineStatus *omni.MachineStatus, clusterMachineTalosVers genOptions.TypedSpec().Value.InstallImage.SchematicId = clusterMachineTalosVersion.TypedSpec().Value.SchematicId genOptions.TypedSpec().Value.InstallImage.TalosVersion = clusterMachineTalosVersion.TypedSpec().Value.TalosVersion - genOptions.TypedSpec().Value.InstallImage.SchematicInitialized = machineStatus.TypedSpec().Value.Schematic != nil + genOptions.TypedSpec().Value.InstallImage.SchematicInitialized = machineStatus.TypedSpec().Value.SchematicReady() if genOptions.TypedSpec().Value.InstallImage.SchematicInitialized { genOptions.TypedSpec().Value.InstallImage.SchematicInvalid = machineStatus.TypedSpec().Value.GetSchematic().GetInvalid() diff --git a/internal/backend/runtime/omni/controllers/omni/machine_status.go b/internal/backend/runtime/omni/controllers/omni/machine_status.go index 3badbafc..a0fe7417 100644 --- a/internal/backend/runtime/omni/controllers/omni/machine_status.go +++ b/internal/backend/runtime/omni/controllers/omni/machine_status.go @@ -482,57 +482,8 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c } if event.Schematic != nil { - if spec.Schematic == nil { - spec.Schematic = &specs.MachineStatusSpec_Schematic{} - } - - spec.Schematic.Raw = event.Schematic.Raw - spec.Schematic.Id = event.Schematic.ID - spec.Schematic.FullId = event.Schematic.FullID - spec.Schematic.Extensions = event.Schematic.Extensions - spec.Schematic.Invalid = event.Schematic.Invalid - spec.Schematic.KernelArgs = event.Schematic.KernelArgs - - spec.Schematic.MetaValues = xslices.Map(event.Schematic.MetaValues, func(value schematic.MetaValue) *specs.MetaValue { - return &specs.MetaValue{ - Key: uint32(value.Key), - Value: value.Value, - } - }) - - if event.Schematic.Overlay.Name != "" { - spec.Schematic.Overlay = &specs.Overlay{ - Name: event.Schematic.Overlay.Name, - Image: event.Schematic.Overlay.Image, - } - } - - if spec.Schematic.InitialSchematic == "" { - spec.Schematic.InitialSchematic = spec.Schematic.FullId - } - - spec.Schematic.InAgentMode = event.Schematic.InAgentMode - - // We populate the initial state on a best-effort basis: we might have missed the moment to capture it, but it is acceptable. - if spec.Schematic.InitialState == nil { - spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{ - Extensions: event.Schematic.Extensions, - } - } - - // if the schematic is invalid or the machine is in agent mode, we reset the initial schematic information - if spec.Schematic.Invalid || spec.Schematic.InAgentMode { - spec.Schematic.InitialSchematic = "" - spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{} // reset to be empty but leave it initialized - } - - _, kernelArgsInitialized := m.Metadata().Annotations().Get(omni.KernelArgsInitialized) - if !kernelArgsInitialized { - if err := ctrl.kernelArgsInitializer.Init(ctx, m.Metadata().ID(), event.Schematic.KernelArgs); err != nil { - return fmt.Errorf("failed to initialize extra kernel args: %w", err) - } - - m.Metadata().Annotations().Set(omni.KernelArgsInitialized, "") + if err := ctrl.handleEventSchematic(ctx, m, event); err != nil { + return fmt.Errorf("failed to handle schematic event: %w", err) } } @@ -565,6 +516,74 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c return nil } +func (ctrl *MachineStatusController) handleEventSchematic(ctx context.Context, ms *omni.MachineStatus, event machinetask.Info) error { + spec := ms.TypedSpec().Value + + // If the machine is in agent mode, reset the schematic information as it were never observed. + // At that stage, anything on it (extensions, kernel args, etc.) is irrelevant and even wrong for Omni to process. + if event.Schematic.InAgentMode { + spec.Schematic = &specs.MachineStatusSpec_Schematic{ + InAgentMode: true, + } + + return nil + } + + if spec.Schematic == nil { + spec.Schematic = &specs.MachineStatusSpec_Schematic{} + } + + spec.Schematic.Raw = event.Schematic.Raw + spec.Schematic.Id = event.Schematic.ID + spec.Schematic.FullId = event.Schematic.FullID + spec.Schematic.Extensions = event.Schematic.Extensions + spec.Schematic.Invalid = event.Schematic.Invalid + spec.Schematic.KernelArgs = event.Schematic.KernelArgs + spec.Schematic.InAgentMode = event.Schematic.InAgentMode + + spec.Schematic.MetaValues = xslices.Map(event.Schematic.MetaValues, func(value schematic.MetaValue) *specs.MetaValue { + return &specs.MetaValue{ + Key: uint32(value.Key), + Value: value.Value, + } + }) + + if event.Schematic.Overlay.Name != "" { + spec.Schematic.Overlay = &specs.Overlay{ + Name: event.Schematic.Overlay.Name, + Image: event.Schematic.Overlay.Image, + } + } + + if spec.Schematic.InitialSchematic == "" { + spec.Schematic.InitialSchematic = spec.Schematic.FullId + } + + // We populate the initial state on a best-effort basis: we might have missed the moment to capture it, but it is acceptable. + if spec.Schematic.InitialState == nil { + spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{ + Extensions: event.Schematic.Extensions, + } + } + + // If the schematic is invalid, reset the initial schematic information. + if spec.Schematic.Invalid { + spec.Schematic.InitialSchematic = "" + spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{} // reset to be empty but leave it initialized + } + + _, kernelArgsInitialized := ms.Metadata().Annotations().Get(omni.KernelArgsInitialized) + if !kernelArgsInitialized { + if err := ctrl.kernelArgsInitializer.Init(ctx, ms.Metadata().ID(), event.Schematic.KernelArgs); err != nil { + return fmt.Errorf("failed to initialize extra kernel args: %w", err) + } + + ms.Metadata().Annotations().Set(omni.KernelArgsInitialized, "") + } + + return nil +} + // reconcileLabels is a wrapper around omni.MachineStatusReconcileLabels, but it overrides the "installed" label // based on a matching infra.MachineStatus for that machine. // diff --git a/internal/backend/runtime/omni/controllers/omni/machine_status_test.go b/internal/backend/runtime/omni/controllers/omni/machine_status_test.go index 0bccbc1a..ec77b68c 100644 --- a/internal/backend/runtime/omni/controllers/omni/machine_status_test.go +++ b/internal/backend/runtime/omni/controllers/omni/machine_status_test.go @@ -440,11 +440,7 @@ func (suite *MachineStatusSuite) TestMachineSchematic() { }, }, expected: &specs.MachineStatusSpec_Schematic{ - Id: defaultSchematic, - InitialSchematic: "", - FullId: defaultSchematic, - InAgentMode: true, - InitialState: &specs.MachineStatusSpec_Schematic_InitialState{}, + InAgentMode: true, }, }, } { diff --git a/internal/backend/runtime/omni/controllers/omni/machineconfig/reconciliation_context.go b/internal/backend/runtime/omni/controllers/omni/machineconfig/reconciliation_context.go index 34c0da43..4ab009e0 100644 --- a/internal/backend/runtime/omni/controllers/omni/machineconfig/reconciliation_context.go +++ b/internal/backend/runtime/omni/controllers/omni/machineconfig/reconciliation_context.go @@ -69,15 +69,11 @@ func checkClusterReady(ctx context.Context, r controller.Reader, machineConfig * func checkMachineStatus(ctx context.Context, r controller.Reader, machineStatus *omni.MachineStatus) error { if !machineStatus.TypedSpec().Value.Connected { - return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine is not connected", machineStatus.Metadata().ID()) + return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine %q is not connected", machineStatus.Metadata().ID()) } - if machineStatus.TypedSpec().Value.Schematic == nil { - return xerrors.NewTagged[qtransform.SkipReconcileTag](fmt.Errorf("machine status '%s' does not have schematic information", machineStatus.Metadata().ID())) - } - - if machineStatus.TypedSpec().Value.Schematic.InAgentMode { - return xerrors.NewTagged[qtransform.SkipReconcileTag](fmt.Errorf("machine status '%s' schematic is in agent mode", machineStatus.Metadata().ID())) + if !machineStatus.TypedSpec().Value.SchematicReady() { + return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine %q schematic is not ready", machineStatus.Metadata().ID()) } // if the machine is managed by a static infra provider, we need to ensure that the infra machine is ready to use @@ -90,7 +86,7 @@ func checkMachineStatus(ctx context.Context, r controller.Reader, machineStatus return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine is managed by a static infra provider but the infra machine status is not found: %w", err) } - return fmt.Errorf("failed to get infra machine status '%s': %w", machineStatus.Metadata().ID(), err) + return fmt.Errorf("failed to get infra machine status %q: %w", machineStatus.Metadata().ID(), err) } if !infraMachineStatus.TypedSpec().Value.ReadyToUse { @@ -130,7 +126,7 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader, machineSetNode, err := safe.ReaderGetByID[*omni.MachineSetNode](ctx, r, machineConfig.Metadata().ID()) if err != nil { if state.IsNotFoundError(err) { - return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine set node not found: %w", machineConfig.Metadata().ID(), err) + return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine set node not found: %w", machineConfig.Metadata().ID(), err) } return nil, err @@ -172,19 +168,19 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader, ) if err != nil { if state.IsNotFoundError(err) { - return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine status snapshot not found: %w", machineConfig.Metadata().ID(), err) + return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine status snapshot not found: %w", machineConfig.Metadata().ID(), err) } - return nil, fmt.Errorf("failed to get machine status snapshot '%s': %w", machineConfig.Metadata().ID(), err) + return nil, fmt.Errorf("failed to get machine status snapshot %q: %w", machineConfig.Metadata().ID(), err) } rc.machineStatus, err = safe.ReaderGetByID[*omni.MachineStatus](ctx, r, machineConfig.Metadata().ID()) if err != nil { if state.IsNotFoundError(err) { - return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine status not found: %w", machineConfig.Metadata().ID(), err) + return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine status not found: %w", machineConfig.Metadata().ID(), err) } - return nil, fmt.Errorf("failed to get machine status '%s': %w", machineConfig.Metadata().ID(), err) + return nil, fmt.Errorf("failed to get machine status %q: %w", machineConfig.Metadata().ID(), err) } if err = checkMachineStatus(ctx, r, rc.machineStatus); err != nil { @@ -194,10 +190,10 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader, genOptions, err := safe.ReaderGetByID[*omni.MachineConfigGenOptions](ctx, r, machineConfig.Metadata().ID()) if err != nil { if state.IsNotFoundError(err) { - return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine config gen options not found: %w", machineConfig.Metadata().ID(), err) + return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine config gen options not found: %w", machineConfig.Metadata().ID(), err) } - return nil, fmt.Errorf("failed to get install image '%s': %w", machineConfig.Metadata().ID(), err) + return nil, fmt.Errorf("failed to get install image %q: %w", machineConfig.Metadata().ID(), err) } rc.installImage = genOptions.TypedSpec().Value.InstallImage diff --git a/internal/backend/runtime/omni/controllers/omni/machineconfig/status.go b/internal/backend/runtime/omni/controllers/omni/machineconfig/status.go index c602633b..5b9f8f9a 100644 --- a/internal/backend/runtime/omni/controllers/omni/machineconfig/status.go +++ b/internal/backend/runtime/omni/controllers/omni/machineconfig/status.go @@ -771,7 +771,7 @@ func (ctrl *ClusterMachineConfigStatusController) shouldResetGraceful( machineSetStatus.TypedSpec().Value.Phase != specs.MachineSetPhase_Destroying, nil } -func (h *ClusterMachineConfigStatusController) gracefulEtcdLeave(ctx context.Context, c *client.Client, id string) error { +func (ctrl *ClusterMachineConfigStatusController) gracefulEtcdLeave(ctx context.Context, c *client.Client, id string) error { _, err := c.EtcdForfeitLeadership(ctx, &machineapi.EtcdForfeitLeadershipRequest{}) if err != nil { return fmt.Errorf("failed to forfeit leadership, node %q: %w", id, err) diff --git a/internal/backend/runtime/omni/controllers/omni/machineupgrade/status.go b/internal/backend/runtime/omni/controllers/omni/machineupgrade/status.go index 61be3f3e..6e43920a 100644 --- a/internal/backend/runtime/omni/controllers/omni/machineupgrade/status.go +++ b/internal/backend/runtime/omni/controllers/omni/machineupgrade/status.go @@ -69,8 +69,7 @@ func (ctrl *StatusController) transform(ctx context.Context, r controller.Reader status.TypedSpec().Value.IsMaintenance = ms.TypedSpec().Value.Maintenance - schematicSpec := ms.TypedSpec().Value.Schematic - if schematicSpec == nil { + if !ms.TypedSpec().Value.SchematicReady() { status.TypedSpec().Value.Phase = specs.MachineUpgradeStatusSpec_Unknown status.TypedSpec().Value.Status = "schematic info is not available" status.TypedSpec().Value.Error = "" @@ -78,6 +77,8 @@ func (ctrl *StatusController) transform(ctx context.Context, r controller.Reader return nil } + schematicSpec := ms.TypedSpec().Value.Schematic + status.TypedSpec().Value.CurrentSchematicId = schematicSpec.FullId if schematicSpec.Raw == "" { diff --git a/internal/backend/runtime/omni/controllers/omni/machineupgrade/status_test.go b/internal/backend/runtime/omni/controllers/omni/machineupgrade/status_test.go index 4d058081..c0f2238f 100644 --- a/internal/backend/runtime/omni/controllers/omni/machineupgrade/status_test.go +++ b/internal/backend/runtime/omni/controllers/omni/machineupgrade/status_test.go @@ -150,6 +150,7 @@ func TestReconcile(t *testing.T) { {Key: 1, Value: "value1"}, {Key: 2, Value: "value2"}, }, + Id: "test-id", FullId: currentSchematicID, Raw: string(currentSchematicRaw), InitialState: &specs.MachineStatusSpec_Schematic_InitialState{ diff --git a/internal/backend/runtime/omni/controllers/omni/omni_test.go b/internal/backend/runtime/omni/controllers/omni/omni_test.go index ed3bd0d8..45a04b80 100644 --- a/internal/backend/runtime/omni/controllers/omni/omni_test.go +++ b/internal/backend/runtime/omni/controllers/omni/omni_test.go @@ -513,6 +513,7 @@ func (suite *OmniSuite) createClusterWithTalosVersion(clusterName string, contro machineStatus.TypedSpec().Value.ManagementAddress = suite.socketConnectionString machineStatus.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{ Id: defaultSchematic, + FullId: defaultSchematic, InitialState: &specs.MachineStatusSpec_Schematic_InitialState{}, } machineStatus.TypedSpec().Value.InitialTalosVersion = cluster.TypedSpec().Value.TalosVersion diff --git a/internal/backend/runtime/omni/controllers/omni/schematic_configuration.go b/internal/backend/runtime/omni/controllers/omni/schematic_configuration.go index 6c4234aa..6832e4c1 100644 --- a/internal/backend/runtime/omni/controllers/omni/schematic_configuration.go +++ b/internal/backend/runtime/omni/controllers/omni/schematic_configuration.go @@ -143,8 +143,8 @@ func (helper *schematicConfigurationHelper) reconcile( ms *omni.MachineStatus, schematicConfiguration *omni.SchematicConfiguration, ) (*omni.MachineExtensionsStatus, error) { - if ms.TypedSpec().Value.Schematic == nil { - return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("schematic information is not yet available") + if !ms.TypedSpec().Value.SchematicReady() { + return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("schematic is not yet ready") } securityState := ms.TypedSpec().Value.SecurityState @@ -368,8 +368,8 @@ func determineKernelArgs(ctx context.Context, machineStatus *omni.MachineStatus, } if !updateSupported { - if machineStatus.TypedSpec().Value.Schematic == nil { - return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet initialized") + if !machineStatus.TypedSpec().Value.SchematicReady() { + return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet ready") } // keep them unchanged - take the current args as-is @@ -437,7 +437,7 @@ func computeMachineExtensionsStatus(ms *omni.MachineStatus, customization *machi requestedExtensions set.Set[string] ) - if ms.TypedSpec().Value.Schematic != nil { + if ms.TypedSpec().Value.SchematicReady() { installedExtensions = xslices.ToSet(ms.TypedSpec().Value.Schematic.Extensions) } diff --git a/internal/backend/runtime/omni/controllers/omni/schematic_configuration_test.go b/internal/backend/runtime/omni/controllers/omni/schematic_configuration_test.go index 9ed2b9d3..90c3c828 100644 --- a/internal/backend/runtime/omni/controllers/omni/schematic_configuration_test.go +++ b/internal/backend/runtime/omni/controllers/omni/schematic_configuration_test.go @@ -72,6 +72,8 @@ func (suite *SchematicConfigurationSuite) TestReconcile() { machineStatus.TypedSpec().Value.TalosVersion = talosVersion machineStatus.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{ + Id: "test-id", + FullId: "test-full-id", Extensions: []string{"siderolabs/hello-world-service"}, InitialSchematic: expectedSchematic, InitialState: &specs.MachineStatusSpec_Schematic_InitialState{ diff --git a/internal/backend/runtime/omni/controllers/omni/talos_upgrade_status.go b/internal/backend/runtime/omni/controllers/omni/talos_upgrade_status.go index dac12121..47c46222 100644 --- a/internal/backend/runtime/omni/controllers/omni/talos_upgrade_status.go +++ b/internal/backend/runtime/omni/controllers/omni/talos_upgrade_status.go @@ -555,8 +555,7 @@ func populateEmptySchematics(ctx context.Context, r controller.ReaderWriter, clu return err } - schematic := machineStatus.TypedSpec().Value.Schematic - if schematic == nil { + if !machineStatus.TypedSpec().Value.SchematicReady() { return nil } @@ -566,7 +565,7 @@ func populateEmptySchematics(ctx context.Context, r controller.ReaderWriter, clu } return safe.WriterModify(ctx, r, clusterMachineTalosVersion, func(res *omni.ClusterMachineTalosVersion) error { - res.TypedSpec().Value.SchematicId = schematic.InitialSchematic + res.TypedSpec().Value.SchematicId = machineStatus.TypedSpec().Value.Schematic.InitialSchematic return nil }) diff --git a/internal/backend/runtime/omni/controllers/testutils/rmock/rmock.go b/internal/backend/runtime/omni/controllers/testutils/rmock/rmock.go index 0cd705b5..71e4f0de 100644 --- a/internal/backend/runtime/omni/controllers/testutils/rmock/rmock.go +++ b/internal/backend/runtime/omni/controllers/testutils/rmock/rmock.go @@ -143,6 +143,7 @@ func init() { res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{ Id: defaultSchematic, + FullId: defaultSchematic, InitialState: &specs.MachineStatusSpec_Schematic_InitialState{}, } diff --git a/internal/integration/integration_test.go b/internal/integration/integration_test.go index 978f2daa..0a1d8b21 100644 --- a/internal/integration/integration_test.go +++ b/internal/integration/integration_test.go @@ -356,7 +356,7 @@ func preRunHooks(t *testing.T, options *TestOptions) { func postRunHooks(t *testing.T, options *TestOptions) { if t.Failed() { - t.Logf("there are failed tests, save support bundle for all cluster") + t.Logf("there are failed tests, save support bundle for all clusters") saveAllSupportBundles(t, options.omniClient, options.OutputDir)