diff --git a/client/api/omni/specs/omni.pb.go b/client/api/omni/specs/omni.pb.go index 277fcdbe..fac6ac49 100644 --- a/client/api/omni/specs/omni.pb.go +++ b/client/api/omni/specs/omni.pb.go @@ -6678,19 +6678,21 @@ func (x *MachineExtensionsStatusSpec) GetTalosVersion() string { // MachineStatusMetricsSpec provides aggregated state of the number of registered and connected machines for the Omni instance. type MachineStatusMetricsSpec struct { - state protoimpl.MessageState `protogen:"open.v1"` - RegisteredMachinesCount uint32 `protobuf:"varint,1,opt,name=registered_machines_count,json=registeredMachinesCount,proto3" json:"registered_machines_count,omitempty"` - ConnectedMachinesCount uint32 `protobuf:"varint,2,opt,name=connected_machines_count,json=connectedMachinesCount,proto3" json:"connected_machines_count,omitempty"` - AllocatedMachinesCount uint32 `protobuf:"varint,3,opt,name=allocated_machines_count,json=allocatedMachinesCount,proto3" json:"allocated_machines_count,omitempty"` - PendingMachinesCount uint32 `protobuf:"varint,4,opt,name=pending_machines_count,json=pendingMachinesCount,proto3" json:"pending_machines_count,omitempty"` - Platforms map[string]uint32 `protobuf:"bytes,6,rep,name=platforms,proto3" json:"platforms,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"` - SecureBootStatus map[string]uint32 `protobuf:"bytes,7,rep,name=secure_boot_status,json=secureBootStatus,proto3" json:"secure_boot_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"` - UkiStatus map[string]uint32 `protobuf:"bytes,8,rep,name=uki_status,json=ukiStatus,proto3" json:"uki_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"` - RegisteredMachinesLimit uint32 `protobuf:"varint,9,opt,name=registered_machines_limit,json=registeredMachinesLimit,proto3" json:"registered_machines_limit,omitempty"` - RegistrationLimitReached bool `protobuf:"varint,10,opt,name=registration_limit_reached,json=registrationLimitReached,proto3" json:"registration_limit_reached,omitempty"` - InvalidSchematicMachinesCount uint32 `protobuf:"varint,11,opt,name=invalid_schematic_machines_count,json=invalidSchematicMachinesCount,proto3" json:"invalid_schematic_machines_count,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + RegisteredMachinesCount uint32 `protobuf:"varint,1,opt,name=registered_machines_count,json=registeredMachinesCount,proto3" json:"registered_machines_count,omitempty"` + ConnectedMachinesCount uint32 `protobuf:"varint,2,opt,name=connected_machines_count,json=connectedMachinesCount,proto3" json:"connected_machines_count,omitempty"` + AllocatedMachinesCount uint32 `protobuf:"varint,3,opt,name=allocated_machines_count,json=allocatedMachinesCount,proto3" json:"allocated_machines_count,omitempty"` + PendingMachinesCount uint32 `protobuf:"varint,4,opt,name=pending_machines_count,json=pendingMachinesCount,proto3" json:"pending_machines_count,omitempty"` + Platforms map[string]uint32 `protobuf:"bytes,6,rep,name=platforms,proto3" json:"platforms,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"` + SecureBootStatus map[string]uint32 `protobuf:"bytes,7,rep,name=secure_boot_status,json=secureBootStatus,proto3" json:"secure_boot_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"` + UkiStatus map[string]uint32 `protobuf:"bytes,8,rep,name=uki_status,json=ukiStatus,proto3" json:"uki_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"` + RegisteredMachinesLimit uint32 `protobuf:"varint,9,opt,name=registered_machines_limit,json=registeredMachinesLimit,proto3" json:"registered_machines_limit,omitempty"` + RegistrationLimitReached bool `protobuf:"varint,10,opt,name=registration_limit_reached,json=registrationLimitReached,proto3" json:"registration_limit_reached,omitempty"` + InvalidSchematicMachinesCount uint32 `protobuf:"varint,11,opt,name=invalid_schematic_machines_count,json=invalidSchematicMachinesCount,proto3" json:"invalid_schematic_machines_count,omitempty"` + ApproachingTalosVersionEndOfSupportMachinesCount uint32 `protobuf:"varint,12,opt,name=approaching_talos_version_end_of_support_machines_count,json=approachingTalosVersionEndOfSupportMachinesCount,proto3" json:"approaching_talos_version_end_of_support_machines_count,omitempty"` + TalosVersionEndOfSupportMachinesCount uint32 `protobuf:"varint,13,opt,name=talos_version_end_of_support_machines_count,json=talosVersionEndOfSupportMachinesCount,proto3" json:"talos_version_end_of_support_machines_count,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *MachineStatusMetricsSpec) Reset() { @@ -6793,6 +6795,20 @@ func (x *MachineStatusMetricsSpec) GetInvalidSchematicMachinesCount() uint32 { return 0 } +func (x *MachineStatusMetricsSpec) GetApproachingTalosVersionEndOfSupportMachinesCount() uint32 { + if x != nil { + return x.ApproachingTalosVersionEndOfSupportMachinesCount + } + return 0 +} + +func (x *MachineStatusMetricsSpec) GetTalosVersionEndOfSupportMachinesCount() uint32 { + if x != nil { + return x.TalosVersionEndOfSupportMachinesCount + } + return 0 +} + // ClusterMetricsSpec contains metrics about the clusters in the Omni instance. type ClusterMetricsSpec struct { state protoimpl.MessageState `protogen:"open.v1"` @@ -11665,7 +11681,7 @@ const file_omni_specs_omni_proto_rawDesc = "" + "\tInstalled\x10\x00\x12\x0e\n" + "\n" + "Installing\x10\x01\x12\f\n" + - "\bRemoving\x10\x02\"\x8c\a\n" + + "\bRemoving\x10\x02\"\xdb\b\n" + "\x18MachineStatusMetricsSpec\x12:\n" + "\x19registered_machines_count\x18\x01 \x01(\rR\x17registeredMachinesCount\x128\n" + "\x18connected_machines_count\x18\x02 \x01(\rR\x16connectedMachinesCount\x128\n" + @@ -11678,7 +11694,9 @@ const file_omni_specs_omni_proto_rawDesc = "" + "\x19registered_machines_limit\x18\t \x01(\rR\x17registeredMachinesLimit\x12<\n" + "\x1aregistration_limit_reached\x18\n" + " \x01(\bR\x18registrationLimitReached\x12G\n" + - " invalid_schematic_machines_count\x18\v \x01(\rR\x1dinvalidSchematicMachinesCount\x1a<\n" + + " invalid_schematic_machines_count\x18\v \x01(\rR\x1dinvalidSchematicMachinesCount\x12q\n" + + "7approaching_talos_version_end_of_support_machines_count\x18\f \x01(\rR0approachingTalosVersionEndOfSupportMachinesCount\x12Z\n" + + "+talos_version_end_of_support_machines_count\x18\r \x01(\rR%talosVersionEndOfSupportMachinesCount\x1a<\n" + "\x0ePlatformsEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + "\x05value\x18\x02 \x01(\rR\x05value:\x028\x01\x1aC\n" + diff --git a/client/api/omni/specs/omni.proto b/client/api/omni/specs/omni.proto index 385adf57..de81f3d8 100644 --- a/client/api/omni/specs/omni.proto +++ b/client/api/omni/specs/omni.proto @@ -1356,6 +1356,8 @@ message MachineStatusMetricsSpec { uint32 registered_machines_limit = 9; bool registration_limit_reached = 10; uint32 invalid_schematic_machines_count = 11; + uint32 approaching_talos_version_end_of_support_machines_count = 12; + uint32 talos_version_end_of_support_machines_count = 13; } // ClusterMetricsSpec contains metrics about the clusters in the Omni instance. diff --git a/client/api/omni/specs/omni_vtproto.pb.go b/client/api/omni/specs/omni_vtproto.pb.go index caff128f..3ab6c880 100644 --- a/client/api/omni/specs/omni_vtproto.pb.go +++ b/client/api/omni/specs/omni_vtproto.pb.go @@ -2450,6 +2450,8 @@ func (m *MachineStatusMetricsSpec) CloneVT() *MachineStatusMetricsSpec { r.RegisteredMachinesLimit = m.RegisteredMachinesLimit r.RegistrationLimitReached = m.RegistrationLimitReached r.InvalidSchematicMachinesCount = m.InvalidSchematicMachinesCount + r.ApproachingTalosVersionEndOfSupportMachinesCount = m.ApproachingTalosVersionEndOfSupportMachinesCount + r.TalosVersionEndOfSupportMachinesCount = m.TalosVersionEndOfSupportMachinesCount if rhs := m.Platforms; rhs != nil { tmpContainer := make(map[string]uint32, len(rhs)) for k, v := range rhs { @@ -6626,6 +6628,12 @@ func (this *MachineStatusMetricsSpec) EqualVT(that *MachineStatusMetricsSpec) bo if this.InvalidSchematicMachinesCount != that.InvalidSchematicMachinesCount { return false } + if this.ApproachingTalosVersionEndOfSupportMachinesCount != that.ApproachingTalosVersionEndOfSupportMachinesCount { + return false + } + if this.TalosVersionEndOfSupportMachinesCount != that.TalosVersionEndOfSupportMachinesCount { + return false + } return string(this.unknownFields) == string(that.unknownFields) } @@ -14280,6 +14288,16 @@ func (m *MachineStatusMetricsSpec) MarshalToSizedBufferVT(dAtA []byte) (int, err i -= len(m.unknownFields) copy(dAtA[i:], m.unknownFields) } + if m.TalosVersionEndOfSupportMachinesCount != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.TalosVersionEndOfSupportMachinesCount)) + i-- + dAtA[i] = 0x68 + } + if m.ApproachingTalosVersionEndOfSupportMachinesCount != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.ApproachingTalosVersionEndOfSupportMachinesCount)) + i-- + dAtA[i] = 0x60 + } if m.InvalidSchematicMachinesCount != 0 { i = protohelpers.EncodeVarint(dAtA, i, uint64(m.InvalidSchematicMachinesCount)) i-- @@ -18939,6 +18957,12 @@ func (m *MachineStatusMetricsSpec) SizeVT() (n int) { if m.InvalidSchematicMachinesCount != 0 { n += 1 + protohelpers.SizeOfVarint(uint64(m.InvalidSchematicMachinesCount)) } + if m.ApproachingTalosVersionEndOfSupportMachinesCount != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.ApproachingTalosVersionEndOfSupportMachinesCount)) + } + if m.TalosVersionEndOfSupportMachinesCount != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.TalosVersionEndOfSupportMachinesCount)) + } n += len(m.unknownFields) return n } @@ -36756,6 +36780,44 @@ func (m *MachineStatusMetricsSpec) UnmarshalVT(dAtA []byte) error { break } } + case 12: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ApproachingTalosVersionEndOfSupportMachinesCount", wireType) + } + m.ApproachingTalosVersionEndOfSupportMachinesCount = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ApproachingTalosVersionEndOfSupportMachinesCount |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 13: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TalosVersionEndOfSupportMachinesCount", wireType) + } + m.TalosVersionEndOfSupportMachinesCount = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TalosVersionEndOfSupportMachinesCount |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := protohelpers.Skip(dAtA[iNdEx:]) diff --git a/client/pkg/omni/resources/omni/notification.go b/client/pkg/omni/resources/omni/notification.go index a9b967d2..4a8f3c38 100644 --- a/client/pkg/omni/resources/omni/notification.go +++ b/client/pkg/omni/resources/omni/notification.go @@ -34,6 +34,14 @@ const ( // NotificationNonImageFactoryMachinesID is the ID for the non-ImageFactory machines deprecation notification. // tsgen:NotificationNonImageFactoryMachinesID NotificationNonImageFactoryMachinesID = "non-image-factory-machines" + + // NotificationApproachingTalosVersionEndOfSupportID is the ID for the notification shown when machines are approaching Talos version end of support. + // tsgen:NotificationApproachingTalosVersionEndOfSupportID + NotificationApproachingTalosVersionEndOfSupportID = "approaching-talos-version-end-of-support" + + // NotificationTalosVersionEndOfSupportID is the ID for the notification shown when machines have reached Talos version end of support. + // tsgen:NotificationTalosVersionEndOfSupportID + NotificationTalosVersionEndOfSupportID = "talos-version-end-of-support" ) // Notification describes a generic notification emitted by a controller. diff --git a/cmd/omni/cmd/cmd.go b/cmd/omni/cmd/cmd.go index 7db88e0d..c26ae6be 100644 --- a/cmd/omni/cmd/cmd.go +++ b/cmd/omni/cmd/cmd.go @@ -164,7 +164,6 @@ func buildRootCommand() (*cobra.Command, error) { defineStorageFlags(rootCmd, rootCmdFlagBinder, flagConfig) defineRegistriesFlags(rootCmdFlagBinder, flagConfig) defineFeatureFlags(rootCmdFlagBinder, flagConfig) - defineNotificationFlags(rootCmdFlagBinder, flagConfig) defineDebugFlags(rootCmdFlagBinder, flagConfig) defineEtcdBackupsFlags(rootCmd, rootCmdFlagBinder, flagConfig) defineEulaFlags(rootCmd, rootCmdFlagBinder, flagConfig) @@ -353,12 +352,6 @@ func defineFeatureFlags(b *FlagBinder, flagConfig *config.Params) { b.BoolVar("features.enableClusterImport", &flagConfig.Features.EnableClusterImport) } -func defineNotificationFlags(b *FlagBinder, flagConfig *config.Params) { - b.BoolVar("notifications.nonImageFactoryDeprecation.enabled", &flagConfig.Notifications.NonImageFactoryDeprecation.Enabled) - b.StringVar("notifications.nonImageFactoryDeprecation.title", &flagConfig.Notifications.NonImageFactoryDeprecation.Title) - b.StringVar("notifications.nonImageFactoryDeprecation.body", &flagConfig.Notifications.NonImageFactoryDeprecation.Body) -} - func defineDebugFlags(b *FlagBinder, flagConfig *config.Params) { b.StringVar("debug.pprof.endpoint", &flagConfig.Debug.Pprof.Endpoint) b.StringVar("debug.server.endpoint", &flagConfig.Debug.Server.Endpoint) diff --git a/frontend/src/api/omni/specs/omni.pb.ts b/frontend/src/api/omni/specs/omni.pb.ts index f1b95ba6..b453ff5c 100644 --- a/frontend/src/api/omni/specs/omni.pb.ts +++ b/frontend/src/api/omni/specs/omni.pb.ts @@ -928,6 +928,8 @@ export type MachineStatusMetricsSpec = { registered_machines_limit?: number registration_limit_reached?: boolean invalid_schematic_machines_count?: number + approaching_talos_version_end_of_support_machines_count?: number + talos_version_end_of_support_machines_count?: number } export type ClusterMetricsSpec = { diff --git a/frontend/src/api/resources.ts b/frontend/src/api/resources.ts index 5dbb78a2..4b3aa130 100644 --- a/frontend/src/api/resources.ts +++ b/frontend/src/api/resources.ts @@ -214,6 +214,8 @@ export const NodeForceDestroyRequestType = "NodeForceDestroyRequests.omni.sidero export const NotificationType = "Notifications.omni.sidero.dev"; export const NotificationMachineRegistrationLimitID = "machine-registration-limit"; export const NotificationNonImageFactoryMachinesID = "non-image-factory-machines"; +export const NotificationApproachingTalosVersionEndOfSupportID = "approaching-talos-version-end-of-support"; +export const NotificationTalosVersionEndOfSupportID = "talos-version-end-of-support"; export const OngoingTaskType = "OngoingTasks.omni.sidero.dev"; export const RedactedClusterMachineConfigType = "RedactedClusterMachineConfigs.omni.sidero.dev"; export const RotateKubernetesCAType = "RotateKubernetesCAs.omni.sidero.dev"; diff --git a/frontend/src/components/THeader/THeader.stories.ts b/frontend/src/components/THeader/THeader.stories.ts index c40fc412..b5953486 100644 --- a/frontend/src/components/THeader/THeader.stories.ts +++ b/frontend/src/components/THeader/THeader.stories.ts @@ -7,7 +7,7 @@ import { createWatchStreamHandler } from '@msw/helpers' import type { Meta, StoryObj } from '@storybook/vue3-vite' import { type NotificationSpec, NotificationSpecType } from '@/api/omni/specs/omni.pb' -import { DefaultNamespace, NotificationType } from '@/api/resources' +import { EphemeralNamespace, NotificationType } from '@/api/resources' import THeader from './THeader.vue' @@ -24,13 +24,13 @@ export const Default: Story = { handlers: [ createWatchStreamHandler({ expectedOptions: { - namespace: DefaultNamespace, + namespace: EphemeralNamespace, type: NotificationType, }, initialResources: faker.helpers.multiple( () => ({ metadata: { - namespace: DefaultNamespace, + namespace: EphemeralNamespace, type: NotificationType, id: faker.string.uuid(), }, diff --git a/frontend/src/components/THeader/THeader.vue b/frontend/src/components/THeader/THeader.vue index 30c3dc46..7ffcaf4b 100644 --- a/frontend/src/components/THeader/THeader.vue +++ b/frontend/src/components/THeader/THeader.vue @@ -10,7 +10,7 @@ import { computed, ref } from 'vue' import { Runtime } from '@/api/common/omni.pb' import { type NotificationSpec, NotificationSpecType } from '@/api/omni/specs/omni.pb' -import { DefaultNamespace, NotificationType } from '@/api/resources' +import { EphemeralNamespace, NotificationType } from '@/api/resources' import IconButton from '@/components/Button/IconButton.vue' import TButton from '@/components/Button/TButton.vue' import TIcon, { type IconType } from '@/components/Icon/TIcon.vue' @@ -28,7 +28,7 @@ defineEmits<{ toggleSidebar: [] }>() const { data } = useResourceWatch({ runtime: Runtime.Omni, resource: { - namespace: DefaultNamespace, + namespace: EphemeralNamespace, type: NotificationType, }, }) diff --git a/internal/backend/runtime/omni/controllers/omni/machine_status_metrics.go b/internal/backend/runtime/omni/controllers/omni/machine_status_metrics.go index 7b407fb9..1983887b 100644 --- a/internal/backend/runtime/omni/controllers/omni/machine_status_metrics.go +++ b/internal/backend/runtime/omni/controllers/omni/machine_status_metrics.go @@ -10,9 +10,11 @@ import ( "fmt" "iter" "strconv" + "strings" "sync" "time" + "github.com/blang/semver/v4" "github.com/cosi-project/runtime/pkg/controller" "github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/safe" @@ -22,6 +24,7 @@ import ( "go.uber.org/zap" "github.com/siderolabs/omni/client/api/omni/specs" + "github.com/siderolabs/omni/client/pkg/constants" "github.com/siderolabs/omni/client/pkg/omni/resources" "github.com/siderolabs/omni/client/pkg/omni/resources/infra" "github.com/siderolabs/omni/client/pkg/omni/resources/omni" @@ -34,18 +37,34 @@ type nodeInfo struct { connected bool } -// NonImageFactoryDeprecationConfig contains configuration for the non-ImageFactory deprecation notification. -type NonImageFactoryDeprecationConfig struct { - Title string - Body string - Enabled bool -} +const ( + // NonImageFactoryNotificationEnabled controls whether a notification is shown for non-ImageFactory machines. + // This will be flipped to true in a future release. + NonImageFactoryNotificationEnabled = false + + // UnsupportedTalosVersionNotificationEnabled controls whether notifications are shown for unsupported Talos versions. + UnsupportedTalosVersionNotificationEnabled = true + + talosVersionSupportPolicyDocsURL = "https://docs.siderolabs.com/omni/getting-started/talos-version-support-policy" + + nonImageFactoryNotificationTitle = "Non-ImageFactory Machines Detected" + nonImageFactoryNotificationBody = "%d machine(s) were provisioned without ImageFactory." + + " Omni will refuse to start with non-ImageFactory machines in a future release." + + " Please re-provision them using ImageFactory." + + approachingTalosEndOfSupportNotificationTitle = "Talos Version Approaching End of Support" + approachingTalosEndOfSupportNotificationBody = "%d machine(s) are running a Talos version that will lose Omni support soon." + + " The minimum supported version is %s. Please upgrade: " + talosVersionSupportPolicyDocsURL + + talosVersionEndOfSupportNotificationTitle = "Unsupported Talos Version Detected" + talosVersionEndOfSupportNotificationBody = "%d machine(s) are running unsupported Talos versions (below %s)." + + " Please upgrade immediately: " + talosVersionSupportPolicyDocsURL +) // NewMachineStatusMetricsController creates a new MachineStatusMetricsController. -func NewMachineStatusMetricsController(maxRegisteredMachines uint32, nonImageFactoryDeprecation NonImageFactoryDeprecationConfig) *MachineStatusMetricsController { +func NewMachineStatusMetricsController(maxRegisteredMachines uint32) *MachineStatusMetricsController { return &MachineStatusMetricsController{ - maxRegisteredMachines: maxRegisteredMachines, - nonImageFactoryDeprecation: nonImageFactoryDeprecation, + maxRegisteredMachines: maxRegisteredMachines, } } @@ -58,19 +77,19 @@ type MachineStatusMetricsController struct { metricsOnce sync.Once - nonImageFactoryDeprecation NonImageFactoryDeprecationConfig - maxRegisteredMachines uint32 platformNames []string - metricNumMachines prometheus.Gauge - metricNumConnectedMachines prometheus.Gauge - metricNumInvalidSchematicMachines prometheus.Gauge - metricNumMachinesPerVersion *prometheus.Desc - metricMachinePlatforms *prometheus.GaugeVec - metricMachineSecureBootStatus *prometheus.GaugeVec - metricMachineUKIStatus *prometheus.GaugeVec + metricNumMachines prometheus.Gauge + metricNumConnectedMachines prometheus.Gauge + metricNumInvalidSchematicMachines prometheus.Gauge + metricNumApproachingTalosVersionEndOfSupportMachines prometheus.Gauge + metricNumTalosVersionEndOfSupportMachines prometheus.Gauge + metricNumMachinesPerVersion *prometheus.Desc + metricMachinePlatforms *prometheus.GaugeVec + metricMachineSecureBootStatus *prometheus.GaugeVec + metricMachineUKIStatus *prometheus.GaugeVec } // Name implements controller.Controller interface. @@ -125,6 +144,16 @@ func (ctrl *MachineStatusMetricsController) initMetrics() { Help: "Number of machines in the instance that were provisioned without using ImageFactory.", }) + ctrl.metricNumApproachingTalosVersionEndOfSupportMachines = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "omni_machines_approaching_talos_version_end_of_support", + Help: "Number of machines running a Talos version at or near the minimum supported version.", + }) + + ctrl.metricNumTalosVersionEndOfSupportMachines = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "omni_machines_talos_version_end_of_support", + Help: "Number of machines running a Talos version below the minimum supported version.", + }) + ctrl.metricNumMachinesPerVersion = prometheus.NewDesc( "omni_machines_version", "Number of machines in the instance by version.", @@ -198,12 +227,22 @@ func (ctrl *MachineStatusMetricsController) Run(ctx context.Context, r controlle } } - if ctrl.nonImageFactoryDeprecation.Enabled { + if NonImageFactoryNotificationEnabled { if err = ctrl.reconcileNonImageFactoryDeprecationNotification(ctx, r, metricsSpec); err != nil { return err } } + if UnsupportedTalosVersionNotificationEnabled { + if err = ctrl.reconcileApproachingTalosVersionEndOfSupportNotification(ctx, r, metricsSpec); err != nil { + return err + } + + if err = ctrl.reconcileTalosVersionEndOfSupportNotification(ctx, r, metricsSpec); err != nil { + return err + } + } + select { case <-ctx.Done(): return nil @@ -237,8 +276,8 @@ func (ctrl *MachineStatusMetricsController) reconcileNonImageFactoryDeprecationN if invalidSchematicCount > 0 { return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationNonImageFactoryMachinesID), func(res *omni.Notification) error { - res.TypedSpec().Value.Title = ctrl.nonImageFactoryDeprecation.Title - res.TypedSpec().Value.Body = fmt.Sprintf(ctrl.nonImageFactoryDeprecation.Body, invalidSchematicCount) + res.TypedSpec().Value.Title = nonImageFactoryNotificationTitle + res.TypedSpec().Value.Body = fmt.Sprintf(nonImageFactoryNotificationBody, invalidSchematicCount) res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING return nil @@ -251,6 +290,44 @@ func (ctrl *MachineStatusMetricsController) reconcileNonImageFactoryDeprecationN return err } +func (ctrl *MachineStatusMetricsController) reconcileApproachingTalosVersionEndOfSupportNotification(ctx context.Context, r controller.Runtime, metricsSpec *specs.MachineStatusMetricsSpec) error { + count := int(metricsSpec.ApproachingTalosVersionEndOfSupportMachinesCount) + if count > 0 { + return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationApproachingTalosVersionEndOfSupportID), + func(res *omni.Notification) error { + res.TypedSpec().Value.Title = approachingTalosEndOfSupportNotificationTitle + res.TypedSpec().Value.Body = fmt.Sprintf(approachingTalosEndOfSupportNotificationBody, count, constants.MinTalosVersion) + res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING + + return nil + }, + ) + } + + _, err := helpers.TeardownAndDestroy(ctx, r, omni.NewNotification(omni.NotificationApproachingTalosVersionEndOfSupportID).Metadata()) + + return err +} + +func (ctrl *MachineStatusMetricsController) reconcileTalosVersionEndOfSupportNotification(ctx context.Context, r controller.Runtime, metricsSpec *specs.MachineStatusMetricsSpec) error { + count := int(metricsSpec.TalosVersionEndOfSupportMachinesCount) + if count > 0 { + return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationTalosVersionEndOfSupportID), + func(res *omni.Notification) error { + res.TypedSpec().Value.Title = talosVersionEndOfSupportNotificationTitle + res.TypedSpec().Value.Body = fmt.Sprintf(talosVersionEndOfSupportNotificationBody, count, constants.MinTalosVersion) + res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING + + return nil + }, + ) + } + + _, err := helpers.TeardownAndDestroy(ctx, r, omni.NewNotification(omni.NotificationTalosVersionEndOfSupportID).Metadata()) + + return err +} + func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omni.MachineStatus], numPendingMachines int) *specs.MachineStatusMetricsSpec { platformMetrics := make(map[string]uint32, len(ctrl.platformNames)) for _, p := range ctrl.platformNames { @@ -270,7 +347,14 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn "false": 0, } - var machines, connectedMachines, allocatedMachines, invalidSchematicMachines int + minTalosVer := semver.MustParse(constants.MinTalosVersion) + // Machines at MinTalosVersion or 1 minor above are "approaching talos end of support" + approachingThreshold := semver.Version{Major: minTalosVer.Major, Minor: minTalosVer.Minor + 2} + + var ( + machines, connectedMachines, allocatedMachines, invalidSchematicMachines int + approachingEndOfSupportMachines, endOfSupportTalosVersionMachines int + ) ctrl.versionsMu.Lock() ctrl.versionsMap = map[nodeInfo]int32{} @@ -282,12 +366,25 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn connectedMachines++ } - if ms.TypedSpec().Value.TalosVersion != "" { + talosVersion := ms.TypedSpec().Value.TalosVersion + + if talosVersion != "" { ctrl.versionsMap[nodeInfo{ - talosVersion: ms.TypedSpec().Value.TalosVersion, + talosVersion: talosVersion, cluster: ms.TypedSpec().Value.Cluster, connected: ms.TypedSpec().Value.Connected, }]++ + + if ver, err := semver.ParseTolerant(strings.TrimLeft(talosVersion, "v")); err == nil { + ver.Pre = nil + + switch { + case ver.LT(minTalosVer): + endOfSupportTalosVersionMachines++ + case ver.LT(approachingThreshold): + approachingEndOfSupportMachines++ + } + } } if ms.TypedSpec().Value.Cluster != "" { @@ -318,6 +415,8 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn ctrl.metricNumMachines.Set(float64(machines)) ctrl.metricNumConnectedMachines.Set(float64(connectedMachines)) ctrl.metricNumInvalidSchematicMachines.Set(float64(invalidSchematicMachines)) + ctrl.metricNumApproachingTalosVersionEndOfSupportMachines.Set(float64(approachingEndOfSupportMachines)) + ctrl.metricNumTalosVersionEndOfSupportMachines.Set(float64(endOfSupportTalosVersionMachines)) for key, num := range platformMetrics { ctrl.metricMachinePlatforms.WithLabelValues(key).Set(float64(num)) @@ -342,6 +441,8 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn RegisteredMachinesLimit: ctrl.maxRegisteredMachines, RegistrationLimitReached: ctrl.maxRegisteredMachines > 0 && uint32(machines) >= ctrl.maxRegisteredMachines, InvalidSchematicMachinesCount: uint32(invalidSchematicMachines), + ApproachingTalosVersionEndOfSupportMachinesCount: uint32(approachingEndOfSupportMachines), + TalosVersionEndOfSupportMachinesCount: uint32(endOfSupportTalosVersionMachines), } } @@ -365,6 +466,8 @@ func (ctrl *MachineStatusMetricsController) Collect(ch chan<- prometheus.Metric) ctrl.metricNumMachines.Collect(ch) ctrl.metricNumConnectedMachines.Collect(ch) ctrl.metricNumInvalidSchematicMachines.Collect(ch) + ctrl.metricNumApproachingTalosVersionEndOfSupportMachines.Collect(ch) + ctrl.metricNumTalosVersionEndOfSupportMachines.Collect(ch) ctrl.metricMachinePlatforms.Collect(ch) ctrl.metricMachineSecureBootStatus.Collect(ch) ctrl.metricMachineUKIStatus.Collect(ch) diff --git a/internal/backend/runtime/omni/controllers/omni/machine_status_metrics_test.go b/internal/backend/runtime/omni/controllers/omni/machine_status_metrics_test.go index 9b762883..52a98a4c 100644 --- a/internal/backend/runtime/omni/controllers/omni/machine_status_metrics_test.go +++ b/internal/backend/runtime/omni/controllers/omni/machine_status_metrics_test.go @@ -22,14 +22,6 @@ import ( "github.com/siderolabs/omni/internal/backend/runtime/omni/controllers/testutils" ) -func newNonImageFactoryDeprecationConfig(enabled bool) omnictrl.NonImageFactoryDeprecationConfig { - return omnictrl.NonImageFactoryDeprecationConfig{ - Enabled: enabled, - Title: "Non-ImageFactory Machines Detected", - Body: "%d machine(s) were provisioned without ImageFactory.", - } -} - func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) { t.Parallel() @@ -82,7 +74,7 @@ func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) { testutils.WithRuntime(ctx, t, testutils.TestOptions{}, func(_ context.Context, tc testutils.TestContext) { - require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(tt.maxRegistered, omnictrl.NonImageFactoryDeprecationConfig{}))) + require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(tt.maxRegistered))) }, func(ctx context.Context, tc testutils.TestContext) { for _, id := range tt.machineIDs { @@ -113,43 +105,42 @@ func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) { } } -func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T) { +func TestMachineStatusMetricsController_UnsupportedTalosVersion(t *testing.T) { t.Parallel() + require.True(t, omnictrl.UnsupportedTalosVersionNotificationEnabled, "this test assumes UnsupportedTalosVersionNotificationEnabled is true") + for _, tt := range []struct { - name string - invalidSchematicIDs []string - validSchematicIDs []string - enabled bool - expectNotification bool - expectCount int + machineVersions map[string]string + name string + expectApproachingCount int + expectEndOfSupportCount int + expectApproaching bool + expectEndOfSupport bool }{ { - name: "disabled, invalid machines present", - invalidSchematicIDs: []string{"m1"}, - enabled: false, - expectNotification: false, + name: "all machines above threshold", + machineVersions: map[string]string{"m1": "v1.10.0", "m2": "v1.11.0"}, }, { - name: "enabled, no invalid machines", - validSchematicIDs: []string{"m1", "m2"}, - enabled: true, - expectNotification: false, + name: "machines approaching end of support", + machineVersions: map[string]string{"m1": "v1.8.0", "m2": "v1.9.0", "m3": "v1.11.0"}, + expectApproaching: true, + expectApproachingCount: 2, }, { - name: "enabled, some invalid machines", - invalidSchematicIDs: []string{"m1", "m2"}, - validSchematicIDs: []string{"m3"}, - enabled: true, - expectNotification: true, - expectCount: 2, + name: "machines past end of support", + machineVersions: map[string]string{"m1": "v1.7.0", "m2": "v1.6.0", "m3": "v1.11.0"}, + expectEndOfSupport: true, + expectEndOfSupportCount: 2, }, { - name: "enabled, all invalid machines", - invalidSchematicIDs: []string{"m1", "m2", "m3"}, - enabled: true, - expectNotification: true, - expectCount: 3, + name: "mix of approaching and past end of support", + machineVersions: map[string]string{"m1": "v1.8.0", "m2": "v1.7.0", "m3": "v1.11.0"}, + expectApproaching: true, + expectApproachingCount: 1, + expectEndOfSupport: true, + expectEndOfSupportCount: 1, }, } { t.Run(tt.name, func(t *testing.T) { @@ -160,40 +151,35 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T) testutils.WithRuntime(ctx, t, testutils.TestOptions{}, func(_ context.Context, tc testutils.TestContext) { - require.NoError(t, tc.Runtime.RegisterController( - omnictrl.NewMachineStatusMetricsController(0, newNonImageFactoryDeprecationConfig(tt.enabled)), - )) + require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(0))) }, func(ctx context.Context, tc testutils.TestContext) { - for _, id := range tt.invalidSchematicIDs { + for id, version := range tt.machineVersions { ms := omni.NewMachineStatus(id) - ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{ - Invalid: true, - } + ms.TypedSpec().Value.TalosVersion = version require.NoError(t, tc.State.Create(ctx, ms)) } - for _, id := range tt.validSchematicIDs { - ms := omni.NewMachineStatus(id) - ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{ - Id: "valid-id", - FullId: "valid-full-id", - } - - require.NoError(t, tc.State.Create(ctx, ms)) - } - - if tt.expectNotification { - rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID, func(res *omni.Notification, a *assert.Assertions) { - a.Equal("Non-ImageFactory Machines Detected", res.TypedSpec().Value.Title) - a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectCount)) + if tt.expectApproaching { + rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) { + a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectApproachingCount)) a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type) }) } else { // Notification should not exist. Sleep briefly since there is no state change to poll on. time.Sleep(500 * time.Millisecond) - rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID) + rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID) + } + + if tt.expectEndOfSupport { + rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) { + a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectEndOfSupportCount)) + a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type) + }) + } else { + time.Sleep(500 * time.Millisecond) + rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID) } }, ) @@ -201,7 +187,7 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T) } } -func TestMachineStatusMetricsController_NonImageFactoryDeprecationTeardown(t *testing.T) { +func TestMachineStatusMetricsController_UnsupportedTalosVersionTeardown(t *testing.T) { t.Parallel() ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) @@ -209,35 +195,27 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecationTeardown(t *te testutils.WithRuntime(ctx, t, testutils.TestOptions{}, func(_ context.Context, tc testutils.TestContext) { - require.NoError(t, tc.Runtime.RegisterController( - omnictrl.NewMachineStatusMetricsController(0, newNonImageFactoryDeprecationConfig(true)), - )) + require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(0))) }, func(ctx context.Context, tc testutils.TestContext) { - // Create a machine with invalid schematic. ms := omni.NewMachineStatus("m1") - ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{Invalid: true} + ms.TypedSpec().Value.TalosVersion = "v1.7.0" require.NoError(t, tc.State.Create(ctx, ms)) - // Wait for the notification to appear. - rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID, func(res *omni.Notification, a *assert.Assertions) { + rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) { a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type) }) - // Fix the machine schematic (no longer invalid). _, err := safe.StateUpdateWithConflicts(ctx, tc.State, ms.Metadata(), func(res *omni.MachineStatus) error { - res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{ - Id: "valid-id", - FullId: "valid-full-id", - } + res.TypedSpec().Value.TalosVersion = "v1.11.0" return nil }) require.NoError(t, err) - // Notification should be torn down. - rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID) + rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID) + rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID) }, ) } diff --git a/internal/backend/runtime/omni/omni.go b/internal/backend/runtime/omni/omni.go index 7741a15a..bed982e0 100644 --- a/internal/backend/runtime/omni/omni.go +++ b/internal/backend/runtime/omni/omni.go @@ -172,11 +172,7 @@ func NewRuntime(cfg *config.Params, talosClientFactory *talos.ClientFactory, dns LBConfig: cfg.Services.LoadBalancer, }, omnictrl.NewMachineCleanupController(), - omnictrl.NewMachineStatusMetricsController(cfg.Account.GetMaxRegisteredMachines(), omnictrl.NonImageFactoryDeprecationConfig{ - Enabled: cfg.Notifications.NonImageFactoryDeprecation.GetEnabled(), - Title: cfg.Notifications.NonImageFactoryDeprecation.GetTitle(), - Body: cfg.Notifications.NonImageFactoryDeprecation.GetBody(), - }), + omnictrl.NewMachineStatusMetricsController(cfg.Account.GetMaxRegisteredMachines()), omnictrl.NewVersionsController(cfg.Registries.GetImageFactoryBaseURL(), cfg.Features.GetEnableTalosPreReleaseVersions(), cfg.Registries.GetKubernetes()), omnictrl.NewClusterLoadBalancerController( cfg.Services.LoadBalancer.GetMinPort(), diff --git a/internal/pkg/config/accessors.generated.go b/internal/pkg/config/accessors.generated.go index 1c33b07f..d927eec6 100644 --- a/internal/pkg/config/accessors.generated.go +++ b/internal/pkg/config/accessors.generated.go @@ -919,39 +919,6 @@ func (s *LogsStripe) SetMinCommit(v uint32) { s.MinCommit = &v } -func (s *NonImageFactoryDeprecation) GetBody() string { - if s == nil || s.Body == nil { - return *new(string) - } - return *s.Body -} - -func (s *NonImageFactoryDeprecation) SetBody(v string) { - s.Body = &v -} - -func (s *NonImageFactoryDeprecation) GetEnabled() bool { - if s == nil || s.Enabled == nil { - return *new(bool) - } - return *s.Enabled -} - -func (s *NonImageFactoryDeprecation) SetEnabled(v bool) { - s.Enabled = &v -} - -func (s *NonImageFactoryDeprecation) GetTitle() string { - if s == nil || s.Title == nil { - return *new(string) - } - return *s.Title -} - -func (s *NonImageFactoryDeprecation) SetTitle(v string) { - s.Title = &v -} - func (s *OIDC) GetAllowUnverifiedEmail() bool { if s == nil || s.AllowUnverifiedEmail == nil { return *new(bool) diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go index b4e126ef..09ca4a64 100644 --- a/internal/pkg/config/config.go +++ b/internal/pkg/config/config.go @@ -30,7 +30,17 @@ import ( "github.com/siderolabs/omni/internal/pkg/jsonschema" ) -const wireguardDefaultPort = "50180" +const ( + wireguardDefaultPort = "50180" + + // UnsupportedTalosVersionFailOnStart controls whether Omni refuses to start when machines + // are running Talos versions below MinTalosVersion. This will be flipped to true in a future release. + UnsupportedTalosVersionFailOnStart = false + + // NonImageFactoryFailOnStart controls whether Omni refuses to start when machines + // were provisioned without ImageFactory. This will be flipped to true in a future release. + NonImageFactoryFailOnStart = false +) //go:embed schema.json var schemaData string @@ -164,6 +174,18 @@ func (p *Params) ValidateState(ctx context.Context, st state.State) error { } } + if UnsupportedTalosVersionFailOnStart { + if err := validations.EnsureNoMachinesBelowMinTalosVersion(ctx, st); err != nil { + return err + } + } + + if NonImageFactoryFailOnStart { + if err := validations.EnsureNoNonImageFactoryMachines(ctx, st); err != nil { + return err + } + } + return nil } diff --git a/internal/pkg/config/config_test.go b/internal/pkg/config/config_test.go index d3bfaee5..8f117774 100644 --- a/internal/pkg/config/config_test.go +++ b/internal/pkg/config/config_test.go @@ -564,10 +564,4 @@ func TestSchemaDefaults(t *testing.T) { // features assert.True(t, p.Features.GetEnableConfigDataCompression()) assert.True(t, p.Features.GetEnableClusterImport()) - - // notifications.nonImageFactoryDeprecation - assert.False(t, p.Notifications.NonImageFactoryDeprecation.GetEnabled()) - assert.Equal(t, "Non-ImageFactory Machines Detected", p.Notifications.NonImageFactoryDeprecation.GetTitle()) - assert.Equal(t, "%d machine(s) were provisioned without ImageFactory. Support for these machines will end after a future release. Please re-provision them using ImageFactory.", - p.Notifications.NonImageFactoryDeprecation.GetBody()) } diff --git a/internal/pkg/config/schema.json b/internal/pkg/config/schema.json index fb296f1a..72453934 100644 --- a/internal/pkg/config/schema.json +++ b/internal/pkg/config/schema.json @@ -13,7 +13,6 @@ "registries", "debug", "features", - "notifications", "eulaAccept" ], "goJSONSchema": { @@ -58,10 +57,6 @@ "description": "Features contains feature flags to enable/disable various Omni features.", "$ref": "#/definitions/Features" }, - "notifications": { - "description": "Notifications contains configuration for system notifications emitted by controllers.", - "$ref": "#/definitions/Notifications" - }, "eulaAccept": { "description": "EulaAccept contains the identity of the person accepting the EULA via CLI flags or config.", "$ref": "#/definitions/EulaAccept" @@ -1729,50 +1724,6 @@ } } }, - "Notifications": { - "type": "object", - "required": [ - "nonImageFactoryDeprecation" - ], - "properties": { - "nonImageFactoryDeprecation": { - "description": "NonImageFactoryDeprecation contains configuration for the notification shown when machines are provisioned without using ImageFactory.", - "$ref": "#/definitions/NonImageFactoryDeprecation" - } - } - }, - "NonImageFactoryDeprecation": { - "type": "object", - "properties": { - "enabled": { - "description": "Enabled controls whether the non-ImageFactory deprecation notification is shown when machines with invalid schematics are detected.", - "x-cli-flag": "non-image-factory-deprecation-enabled", - "type": "boolean", - "default": false, - "goJSONSchema": { - "pointer": true - } - }, - "title": { - "description": "Title is the title of the non-ImageFactory deprecation notification.", - "x-cli-flag": "non-image-factory-deprecation-title", - "type": "string", - "default": "Non-ImageFactory Machines Detected", - "goJSONSchema": { - "pointer": true - } - }, - "body": { - "description": "Body is the body of the non-ImageFactory deprecation notification. Use %d as a placeholder for the number of affected machines.", - "x-cli-flag": "non-image-factory-deprecation-body", - "type": "string", - "default": "%d machine(s) were provisioned without ImageFactory. Support for these machines will end after a future release. Please re-provision them using ImageFactory.", - "goJSONSchema": { - "pointer": true - } - } - } - }, "EulaAccept": { "type": "object", "dependentRequired": { diff --git a/internal/pkg/config/types.generated.go b/internal/pkg/config/types.generated.go index 7e24963e..6c9ce1c5 100644 --- a/internal/pkg/config/types.generated.go +++ b/internal/pkg/config/types.generated.go @@ -434,25 +434,6 @@ type LogsStripe struct { MinCommit *uint32 `json:"minCommit,omitempty,omitzero" yaml:"minCommit,omitempty"` } -type NonImageFactoryDeprecation struct { - // Body is the body of the non-ImageFactory deprecation notification. Use %d as a - // placeholder for the number of affected machines. - Body *string `json:"body,omitempty,omitzero" yaml:"body,omitempty"` - - // Enabled controls whether the non-ImageFactory deprecation notification is shown - // when machines with invalid schematics are detected. - Enabled *bool `json:"enabled,omitempty,omitzero" yaml:"enabled,omitempty"` - - // Title is the title of the non-ImageFactory deprecation notification. - Title *string `json:"title,omitempty,omitzero" yaml:"title,omitempty"` -} - -type Notifications struct { - // NonImageFactoryDeprecation contains configuration for the notification shown - // when machines are provisioned without using ImageFactory. - NonImageFactoryDeprecation NonImageFactoryDeprecation `json:"nonImageFactoryDeprecation" yaml:"nonImageFactoryDeprecation"` -} - type OIDC struct { // AllowUnverifiedEmail controls whether users with unverified emails (without // email_verified claim) are allowed to authenticate. @@ -501,10 +482,6 @@ type Params struct { // Logs contains logging-related configuration. Logs Logs `json:"logs" yaml:"logs"` - // Notifications contains configuration for system notifications emitted by - // controllers. - Notifications Notifications `json:"notifications" yaml:"notifications"` - // Registries contains container image registries configuration. Registries Registries `json:"registries" yaml:"registries"` diff --git a/internal/pkg/config/validations/non_image_factory.go b/internal/pkg/config/validations/non_image_factory.go new file mode 100644 index 00000000..ceac6af8 --- /dev/null +++ b/internal/pkg/config/validations/non_image_factory.go @@ -0,0 +1,41 @@ +// Copyright (c) 2026 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +package validations + +import ( + "context" + "fmt" + + "github.com/cosi-project/runtime/pkg/safe" + "github.com/cosi-project/runtime/pkg/state" + "github.com/gertd/go-pluralize" + + "github.com/siderolabs/omni/client/pkg/omni/resources/omni" +) + +// EnsureNoNonImageFactoryMachines checks that no machines have an invalid schematic (provisioned without ImageFactory). +func EnsureNoNonImageFactoryMachines(ctx context.Context, st state.State) error { + statuses, err := safe.ReaderListAll[*omni.MachineStatus](ctx, st) + if err != nil { + return err + } + + var count int + + for status := range statuses.All() { + if status.TypedSpec().Value.SchematicReady() && status.TypedSpec().Value.GetSchematic().GetInvalid() { + count++ + } + } + + if count == 0 { + return nil + } + + return fmt.Errorf("detected %s provisioned without ImageFactory; "+ + "please re-provision them using ImageFactory", + pluralize.NewClient().Pluralize("machine", count, true)) +} diff --git a/internal/pkg/config/validations/unsupported_talos_version.go b/internal/pkg/config/validations/unsupported_talos_version.go new file mode 100644 index 00000000..de9c8ba9 --- /dev/null +++ b/internal/pkg/config/validations/unsupported_talos_version.go @@ -0,0 +1,35 @@ +// Copyright (c) 2026 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +package validations + +import ( + "context" + "fmt" + + "github.com/blang/semver/v4" + "github.com/cosi-project/runtime/pkg/state" + "github.com/gertd/go-pluralize" + + "github.com/siderolabs/omni/client/pkg/constants" +) + +// EnsureNoMachinesBelowMinTalosVersion checks that no machines are running Talos versions below MinTalosVersion. +func EnsureNoMachinesBelowMinTalosVersion(ctx context.Context, st state.State) error { + minVer := semver.MustParse(constants.MinTalosVersion) + + count, err := getMachinesBelowTalosVersion(ctx, st, minVer) + if err != nil { + return err + } + + if count == 0 { + return nil + } + + return fmt.Errorf("detected %s running unsupported Talos versions (below %s); "+ + "please upgrade the machines", + pluralize.NewClient().Pluralize("machine", int(count), true), constants.MinTalosVersion) +} diff --git a/internal/pkg/config/validations/validations_test.go b/internal/pkg/config/validations/validations_test.go new file mode 100644 index 00000000..364c93fa --- /dev/null +++ b/internal/pkg/config/validations/validations_test.go @@ -0,0 +1,153 @@ +// Copyright (c) 2026 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +package validations_test + +import ( + "context" + "testing" + "time" + + "github.com/cosi-project/runtime/pkg/state" + "github.com/cosi-project/runtime/pkg/state/impl/inmem" + "github.com/cosi-project/runtime/pkg/state/impl/namespaced" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/siderolabs/omni/client/api/omni/specs" + "github.com/siderolabs/omni/client/pkg/omni/resources/omni" + "github.com/siderolabs/omni/internal/pkg/config/validations" +) + +func TestEnsureNoMachinesBelowMinTalosVersion(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + versions map[string]string + name string + expectErr bool + }{ + { + name: "no machines", + versions: nil, + }, + { + name: "machine below MinTalosVersion", + versions: map[string]string{"m1": "v1.7.0"}, + expectErr: true, + }, + { + name: "machine at MinTalosVersion", + versions: map[string]string{"m1": "v1.8.0"}, + }, + { + name: "machine above MinTalosVersion", + versions: map[string]string{"m1": "v1.10.0"}, + }, + { + name: "mix of below and above", + versions: map[string]string{"m1": "v1.7.0", "m2": "v1.10.0"}, + expectErr: true, + }, + { + name: "machine with empty version is ignored", + versions: map[string]string{"m1": ""}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second) + t.Cleanup(cancel) + + st := state.WrapCore(namespaced.NewState(inmem.Build)) + + for id, version := range tt.versions { + ms := omni.NewMachineStatus(id) + ms.TypedSpec().Value.TalosVersion = version + + require.NoError(t, st.Create(ctx, ms)) + } + + err := validations.EnsureNoMachinesBelowMinTalosVersion(ctx, st) + if tt.expectErr { + assert.ErrorContains(t, err, "running unsupported Talos versions") + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestEnsureNoNonImageFactoryMachines(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + machines map[string]*specs.MachineStatusSpec_Schematic + name string + expectErr bool + }{ + { + name: "no machines", + machines: nil, + }, + { + name: "valid schematic", + machines: map[string]*specs.MachineStatusSpec_Schematic{ + "m1": {Id: "abc", FullId: "abc123"}, + }, + }, + { + name: "no schematic (not ready)", + machines: map[string]*specs.MachineStatusSpec_Schematic{ + "m1": nil, + }, + }, + { + name: "invalid schematic", + machines: map[string]*specs.MachineStatusSpec_Schematic{ + "m1": {Invalid: true}, + }, + expectErr: true, + }, + { + name: "mix of valid and invalid", + machines: map[string]*specs.MachineStatusSpec_Schematic{ + "m1": {Id: "abc", FullId: "abc123"}, + "m2": {Invalid: true}, + }, + expectErr: true, + }, + { + name: "agent mode is not counted", + machines: map[string]*specs.MachineStatusSpec_Schematic{ + "m1": {InAgentMode: true, Invalid: true}, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second) + t.Cleanup(cancel) + + st := state.WrapCore(namespaced.NewState(inmem.Build)) + + for id, schematic := range tt.machines { + ms := omni.NewMachineStatus(id) + ms.TypedSpec().Value.Schematic = schematic + + require.NoError(t, st.Create(ctx, ms)) + } + + err := validations.EnsureNoNonImageFactoryMachines(ctx, st) + if tt.expectErr { + assert.ErrorContains(t, err, "provisioned without ImageFactory") + } else { + assert.NoError(t, err) + } + }) + } +}