feat: add Talos version end-of-support notifications and metrics

* Track machines running Talos versions approaching or past end of support relative to MinTalosVersion.
* Replace the config-driven non-ImageFactory deprecation notification with hardcoded constants and add two new notifications (approaching end of support, end of support reached) with corresponding Prometheus metrics.
* Add startup validation hooks (currently disabled) that will refuse to start when unsupported machines are detected.
* Fix frontend notification namespace from Default to Ephemeral.

Signed-off-by: Oguz Kilcan <oguz.kilcan@siderolabs.com>
This commit is contained in:
Oguz Kilcan 2026-04-16 14:47:02 +02:00
parent 302e9175a3
commit 475e3660d7
No known key found for this signature in database
GPG Key ID: 372F271E3AD80BFC
20 changed files with 544 additions and 240 deletions

View File

@ -6678,19 +6678,21 @@ func (x *MachineExtensionsStatusSpec) GetTalosVersion() string {
// MachineStatusMetricsSpec provides aggregated state of the number of registered and connected machines for the Omni instance.
type MachineStatusMetricsSpec struct {
state protoimpl.MessageState `protogen:"open.v1"`
RegisteredMachinesCount uint32 `protobuf:"varint,1,opt,name=registered_machines_count,json=registeredMachinesCount,proto3" json:"registered_machines_count,omitempty"`
ConnectedMachinesCount uint32 `protobuf:"varint,2,opt,name=connected_machines_count,json=connectedMachinesCount,proto3" json:"connected_machines_count,omitempty"`
AllocatedMachinesCount uint32 `protobuf:"varint,3,opt,name=allocated_machines_count,json=allocatedMachinesCount,proto3" json:"allocated_machines_count,omitempty"`
PendingMachinesCount uint32 `protobuf:"varint,4,opt,name=pending_machines_count,json=pendingMachinesCount,proto3" json:"pending_machines_count,omitempty"`
Platforms map[string]uint32 `protobuf:"bytes,6,rep,name=platforms,proto3" json:"platforms,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
SecureBootStatus map[string]uint32 `protobuf:"bytes,7,rep,name=secure_boot_status,json=secureBootStatus,proto3" json:"secure_boot_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
UkiStatus map[string]uint32 `protobuf:"bytes,8,rep,name=uki_status,json=ukiStatus,proto3" json:"uki_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
RegisteredMachinesLimit uint32 `protobuf:"varint,9,opt,name=registered_machines_limit,json=registeredMachinesLimit,proto3" json:"registered_machines_limit,omitempty"`
RegistrationLimitReached bool `protobuf:"varint,10,opt,name=registration_limit_reached,json=registrationLimitReached,proto3" json:"registration_limit_reached,omitempty"`
InvalidSchematicMachinesCount uint32 `protobuf:"varint,11,opt,name=invalid_schematic_machines_count,json=invalidSchematicMachinesCount,proto3" json:"invalid_schematic_machines_count,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
state protoimpl.MessageState `protogen:"open.v1"`
RegisteredMachinesCount uint32 `protobuf:"varint,1,opt,name=registered_machines_count,json=registeredMachinesCount,proto3" json:"registered_machines_count,omitempty"`
ConnectedMachinesCount uint32 `protobuf:"varint,2,opt,name=connected_machines_count,json=connectedMachinesCount,proto3" json:"connected_machines_count,omitempty"`
AllocatedMachinesCount uint32 `protobuf:"varint,3,opt,name=allocated_machines_count,json=allocatedMachinesCount,proto3" json:"allocated_machines_count,omitempty"`
PendingMachinesCount uint32 `protobuf:"varint,4,opt,name=pending_machines_count,json=pendingMachinesCount,proto3" json:"pending_machines_count,omitempty"`
Platforms map[string]uint32 `protobuf:"bytes,6,rep,name=platforms,proto3" json:"platforms,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
SecureBootStatus map[string]uint32 `protobuf:"bytes,7,rep,name=secure_boot_status,json=secureBootStatus,proto3" json:"secure_boot_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
UkiStatus map[string]uint32 `protobuf:"bytes,8,rep,name=uki_status,json=ukiStatus,proto3" json:"uki_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
RegisteredMachinesLimit uint32 `protobuf:"varint,9,opt,name=registered_machines_limit,json=registeredMachinesLimit,proto3" json:"registered_machines_limit,omitempty"`
RegistrationLimitReached bool `protobuf:"varint,10,opt,name=registration_limit_reached,json=registrationLimitReached,proto3" json:"registration_limit_reached,omitempty"`
InvalidSchematicMachinesCount uint32 `protobuf:"varint,11,opt,name=invalid_schematic_machines_count,json=invalidSchematicMachinesCount,proto3" json:"invalid_schematic_machines_count,omitempty"`
ApproachingTalosVersionEndOfSupportMachinesCount uint32 `protobuf:"varint,12,opt,name=approaching_talos_version_end_of_support_machines_count,json=approachingTalosVersionEndOfSupportMachinesCount,proto3" json:"approaching_talos_version_end_of_support_machines_count,omitempty"`
TalosVersionEndOfSupportMachinesCount uint32 `protobuf:"varint,13,opt,name=talos_version_end_of_support_machines_count,json=talosVersionEndOfSupportMachinesCount,proto3" json:"talos_version_end_of_support_machines_count,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *MachineStatusMetricsSpec) Reset() {
@ -6793,6 +6795,20 @@ func (x *MachineStatusMetricsSpec) GetInvalidSchematicMachinesCount() uint32 {
return 0
}
func (x *MachineStatusMetricsSpec) GetApproachingTalosVersionEndOfSupportMachinesCount() uint32 {
if x != nil {
return x.ApproachingTalosVersionEndOfSupportMachinesCount
}
return 0
}
func (x *MachineStatusMetricsSpec) GetTalosVersionEndOfSupportMachinesCount() uint32 {
if x != nil {
return x.TalosVersionEndOfSupportMachinesCount
}
return 0
}
// ClusterMetricsSpec contains metrics about the clusters in the Omni instance.
type ClusterMetricsSpec struct {
state protoimpl.MessageState `protogen:"open.v1"`
@ -11665,7 +11681,7 @@ const file_omni_specs_omni_proto_rawDesc = "" +
"\tInstalled\x10\x00\x12\x0e\n" +
"\n" +
"Installing\x10\x01\x12\f\n" +
"\bRemoving\x10\x02\"\x8c\a\n" +
"\bRemoving\x10\x02\"\xdb\b\n" +
"\x18MachineStatusMetricsSpec\x12:\n" +
"\x19registered_machines_count\x18\x01 \x01(\rR\x17registeredMachinesCount\x128\n" +
"\x18connected_machines_count\x18\x02 \x01(\rR\x16connectedMachinesCount\x128\n" +
@ -11678,7 +11694,9 @@ const file_omni_specs_omni_proto_rawDesc = "" +
"\x19registered_machines_limit\x18\t \x01(\rR\x17registeredMachinesLimit\x12<\n" +
"\x1aregistration_limit_reached\x18\n" +
" \x01(\bR\x18registrationLimitReached\x12G\n" +
" invalid_schematic_machines_count\x18\v \x01(\rR\x1dinvalidSchematicMachinesCount\x1a<\n" +
" invalid_schematic_machines_count\x18\v \x01(\rR\x1dinvalidSchematicMachinesCount\x12q\n" +
"7approaching_talos_version_end_of_support_machines_count\x18\f \x01(\rR0approachingTalosVersionEndOfSupportMachinesCount\x12Z\n" +
"+talos_version_end_of_support_machines_count\x18\r \x01(\rR%talosVersionEndOfSupportMachinesCount\x1a<\n" +
"\x0ePlatformsEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\rR\x05value:\x028\x01\x1aC\n" +

View File

@ -1356,6 +1356,8 @@ message MachineStatusMetricsSpec {
uint32 registered_machines_limit = 9;
bool registration_limit_reached = 10;
uint32 invalid_schematic_machines_count = 11;
uint32 approaching_talos_version_end_of_support_machines_count = 12;
uint32 talos_version_end_of_support_machines_count = 13;
}
// ClusterMetricsSpec contains metrics about the clusters in the Omni instance.

View File

@ -2450,6 +2450,8 @@ func (m *MachineStatusMetricsSpec) CloneVT() *MachineStatusMetricsSpec {
r.RegisteredMachinesLimit = m.RegisteredMachinesLimit
r.RegistrationLimitReached = m.RegistrationLimitReached
r.InvalidSchematicMachinesCount = m.InvalidSchematicMachinesCount
r.ApproachingTalosVersionEndOfSupportMachinesCount = m.ApproachingTalosVersionEndOfSupportMachinesCount
r.TalosVersionEndOfSupportMachinesCount = m.TalosVersionEndOfSupportMachinesCount
if rhs := m.Platforms; rhs != nil {
tmpContainer := make(map[string]uint32, len(rhs))
for k, v := range rhs {
@ -6626,6 +6628,12 @@ func (this *MachineStatusMetricsSpec) EqualVT(that *MachineStatusMetricsSpec) bo
if this.InvalidSchematicMachinesCount != that.InvalidSchematicMachinesCount {
return false
}
if this.ApproachingTalosVersionEndOfSupportMachinesCount != that.ApproachingTalosVersionEndOfSupportMachinesCount {
return false
}
if this.TalosVersionEndOfSupportMachinesCount != that.TalosVersionEndOfSupportMachinesCount {
return false
}
return string(this.unknownFields) == string(that.unknownFields)
}
@ -14280,6 +14288,16 @@ func (m *MachineStatusMetricsSpec) MarshalToSizedBufferVT(dAtA []byte) (int, err
i -= len(m.unknownFields)
copy(dAtA[i:], m.unknownFields)
}
if m.TalosVersionEndOfSupportMachinesCount != 0 {
i = protohelpers.EncodeVarint(dAtA, i, uint64(m.TalosVersionEndOfSupportMachinesCount))
i--
dAtA[i] = 0x68
}
if m.ApproachingTalosVersionEndOfSupportMachinesCount != 0 {
i = protohelpers.EncodeVarint(dAtA, i, uint64(m.ApproachingTalosVersionEndOfSupportMachinesCount))
i--
dAtA[i] = 0x60
}
if m.InvalidSchematicMachinesCount != 0 {
i = protohelpers.EncodeVarint(dAtA, i, uint64(m.InvalidSchematicMachinesCount))
i--
@ -18939,6 +18957,12 @@ func (m *MachineStatusMetricsSpec) SizeVT() (n int) {
if m.InvalidSchematicMachinesCount != 0 {
n += 1 + protohelpers.SizeOfVarint(uint64(m.InvalidSchematicMachinesCount))
}
if m.ApproachingTalosVersionEndOfSupportMachinesCount != 0 {
n += 1 + protohelpers.SizeOfVarint(uint64(m.ApproachingTalosVersionEndOfSupportMachinesCount))
}
if m.TalosVersionEndOfSupportMachinesCount != 0 {
n += 1 + protohelpers.SizeOfVarint(uint64(m.TalosVersionEndOfSupportMachinesCount))
}
n += len(m.unknownFields)
return n
}
@ -36756,6 +36780,44 @@ func (m *MachineStatusMetricsSpec) UnmarshalVT(dAtA []byte) error {
break
}
}
case 12:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field ApproachingTalosVersionEndOfSupportMachinesCount", wireType)
}
m.ApproachingTalosVersionEndOfSupportMachinesCount = 0
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return protohelpers.ErrIntOverflow
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
m.ApproachingTalosVersionEndOfSupportMachinesCount |= uint32(b&0x7F) << shift
if b < 0x80 {
break
}
}
case 13:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field TalosVersionEndOfSupportMachinesCount", wireType)
}
m.TalosVersionEndOfSupportMachinesCount = 0
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return protohelpers.ErrIntOverflow
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
m.TalosVersionEndOfSupportMachinesCount |= uint32(b&0x7F) << shift
if b < 0x80 {
break
}
}
default:
iNdEx = preIndex
skippy, err := protohelpers.Skip(dAtA[iNdEx:])

View File

@ -34,6 +34,14 @@ const (
// NotificationNonImageFactoryMachinesID is the ID for the non-ImageFactory machines deprecation notification.
// tsgen:NotificationNonImageFactoryMachinesID
NotificationNonImageFactoryMachinesID = "non-image-factory-machines"
// NotificationApproachingTalosVersionEndOfSupportID is the ID for the notification shown when machines are approaching Talos version end of support.
// tsgen:NotificationApproachingTalosVersionEndOfSupportID
NotificationApproachingTalosVersionEndOfSupportID = "approaching-talos-version-end-of-support"
// NotificationTalosVersionEndOfSupportID is the ID for the notification shown when machines have reached Talos version end of support.
// tsgen:NotificationTalosVersionEndOfSupportID
NotificationTalosVersionEndOfSupportID = "talos-version-end-of-support"
)
// Notification describes a generic notification emitted by a controller.

View File

@ -164,7 +164,6 @@ func buildRootCommand() (*cobra.Command, error) {
defineStorageFlags(rootCmd, rootCmdFlagBinder, flagConfig)
defineRegistriesFlags(rootCmdFlagBinder, flagConfig)
defineFeatureFlags(rootCmdFlagBinder, flagConfig)
defineNotificationFlags(rootCmdFlagBinder, flagConfig)
defineDebugFlags(rootCmdFlagBinder, flagConfig)
defineEtcdBackupsFlags(rootCmd, rootCmdFlagBinder, flagConfig)
defineEulaFlags(rootCmd, rootCmdFlagBinder, flagConfig)
@ -353,12 +352,6 @@ func defineFeatureFlags(b *FlagBinder, flagConfig *config.Params) {
b.BoolVar("features.enableClusterImport", &flagConfig.Features.EnableClusterImport)
}
func defineNotificationFlags(b *FlagBinder, flagConfig *config.Params) {
b.BoolVar("notifications.nonImageFactoryDeprecation.enabled", &flagConfig.Notifications.NonImageFactoryDeprecation.Enabled)
b.StringVar("notifications.nonImageFactoryDeprecation.title", &flagConfig.Notifications.NonImageFactoryDeprecation.Title)
b.StringVar("notifications.nonImageFactoryDeprecation.body", &flagConfig.Notifications.NonImageFactoryDeprecation.Body)
}
func defineDebugFlags(b *FlagBinder, flagConfig *config.Params) {
b.StringVar("debug.pprof.endpoint", &flagConfig.Debug.Pprof.Endpoint)
b.StringVar("debug.server.endpoint", &flagConfig.Debug.Server.Endpoint)

View File

@ -928,6 +928,8 @@ export type MachineStatusMetricsSpec = {
registered_machines_limit?: number
registration_limit_reached?: boolean
invalid_schematic_machines_count?: number
approaching_talos_version_end_of_support_machines_count?: number
talos_version_end_of_support_machines_count?: number
}
export type ClusterMetricsSpec = {

View File

@ -214,6 +214,8 @@ export const NodeForceDestroyRequestType = "NodeForceDestroyRequests.omni.sidero
export const NotificationType = "Notifications.omni.sidero.dev";
export const NotificationMachineRegistrationLimitID = "machine-registration-limit";
export const NotificationNonImageFactoryMachinesID = "non-image-factory-machines";
export const NotificationApproachingTalosVersionEndOfSupportID = "approaching-talos-version-end-of-support";
export const NotificationTalosVersionEndOfSupportID = "talos-version-end-of-support";
export const OngoingTaskType = "OngoingTasks.omni.sidero.dev";
export const RedactedClusterMachineConfigType = "RedactedClusterMachineConfigs.omni.sidero.dev";
export const RotateKubernetesCAType = "RotateKubernetesCAs.omni.sidero.dev";

View File

@ -7,7 +7,7 @@ import { createWatchStreamHandler } from '@msw/helpers'
import type { Meta, StoryObj } from '@storybook/vue3-vite'
import { type NotificationSpec, NotificationSpecType } from '@/api/omni/specs/omni.pb'
import { DefaultNamespace, NotificationType } from '@/api/resources'
import { EphemeralNamespace, NotificationType } from '@/api/resources'
import THeader from './THeader.vue'
@ -24,13 +24,13 @@ export const Default: Story = {
handlers: [
createWatchStreamHandler<NotificationSpec>({
expectedOptions: {
namespace: DefaultNamespace,
namespace: EphemeralNamespace,
type: NotificationType,
},
initialResources: faker.helpers.multiple(
() => ({
metadata: {
namespace: DefaultNamespace,
namespace: EphemeralNamespace,
type: NotificationType,
id: faker.string.uuid(),
},

View File

@ -10,7 +10,7 @@ import { computed, ref } from 'vue'
import { Runtime } from '@/api/common/omni.pb'
import { type NotificationSpec, NotificationSpecType } from '@/api/omni/specs/omni.pb'
import { DefaultNamespace, NotificationType } from '@/api/resources'
import { EphemeralNamespace, NotificationType } from '@/api/resources'
import IconButton from '@/components/Button/IconButton.vue'
import TButton from '@/components/Button/TButton.vue'
import TIcon, { type IconType } from '@/components/Icon/TIcon.vue'
@ -28,7 +28,7 @@ defineEmits<{ toggleSidebar: [] }>()
const { data } = useResourceWatch<NotificationSpec>({
runtime: Runtime.Omni,
resource: {
namespace: DefaultNamespace,
namespace: EphemeralNamespace,
type: NotificationType,
},
})

View File

@ -10,9 +10,11 @@ import (
"fmt"
"iter"
"strconv"
"strings"
"sync"
"time"
"github.com/blang/semver/v4"
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
@ -22,6 +24,7 @@ import (
"go.uber.org/zap"
"github.com/siderolabs/omni/client/api/omni/specs"
"github.com/siderolabs/omni/client/pkg/constants"
"github.com/siderolabs/omni/client/pkg/omni/resources"
"github.com/siderolabs/omni/client/pkg/omni/resources/infra"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
@ -34,18 +37,34 @@ type nodeInfo struct {
connected bool
}
// NonImageFactoryDeprecationConfig contains configuration for the non-ImageFactory deprecation notification.
type NonImageFactoryDeprecationConfig struct {
Title string
Body string
Enabled bool
}
const (
// NonImageFactoryNotificationEnabled controls whether a notification is shown for non-ImageFactory machines.
// This will be flipped to true in a future release.
NonImageFactoryNotificationEnabled = false
// UnsupportedTalosVersionNotificationEnabled controls whether notifications are shown for unsupported Talos versions.
UnsupportedTalosVersionNotificationEnabled = true
talosVersionSupportPolicyDocsURL = "https://docs.siderolabs.com/omni/getting-started/talos-version-support-policy"
nonImageFactoryNotificationTitle = "Non-ImageFactory Machines Detected"
nonImageFactoryNotificationBody = "%d machine(s) were provisioned without ImageFactory." +
" Omni will refuse to start with non-ImageFactory machines in a future release." +
" Please re-provision them using ImageFactory."
approachingTalosEndOfSupportNotificationTitle = "Talos Version Approaching End of Support"
approachingTalosEndOfSupportNotificationBody = "%d machine(s) are running a Talos version that will lose Omni support soon." +
" The minimum supported version is %s. Please upgrade: " + talosVersionSupportPolicyDocsURL
talosVersionEndOfSupportNotificationTitle = "Unsupported Talos Version Detected"
talosVersionEndOfSupportNotificationBody = "%d machine(s) are running unsupported Talos versions (below %s)." +
" Please upgrade immediately: " + talosVersionSupportPolicyDocsURL
)
// NewMachineStatusMetricsController creates a new MachineStatusMetricsController.
func NewMachineStatusMetricsController(maxRegisteredMachines uint32, nonImageFactoryDeprecation NonImageFactoryDeprecationConfig) *MachineStatusMetricsController {
func NewMachineStatusMetricsController(maxRegisteredMachines uint32) *MachineStatusMetricsController {
return &MachineStatusMetricsController{
maxRegisteredMachines: maxRegisteredMachines,
nonImageFactoryDeprecation: nonImageFactoryDeprecation,
maxRegisteredMachines: maxRegisteredMachines,
}
}
@ -58,19 +77,19 @@ type MachineStatusMetricsController struct {
metricsOnce sync.Once
nonImageFactoryDeprecation NonImageFactoryDeprecationConfig
maxRegisteredMachines uint32
platformNames []string
metricNumMachines prometheus.Gauge
metricNumConnectedMachines prometheus.Gauge
metricNumInvalidSchematicMachines prometheus.Gauge
metricNumMachinesPerVersion *prometheus.Desc
metricMachinePlatforms *prometheus.GaugeVec
metricMachineSecureBootStatus *prometheus.GaugeVec
metricMachineUKIStatus *prometheus.GaugeVec
metricNumMachines prometheus.Gauge
metricNumConnectedMachines prometheus.Gauge
metricNumInvalidSchematicMachines prometheus.Gauge
metricNumApproachingTalosVersionEndOfSupportMachines prometheus.Gauge
metricNumTalosVersionEndOfSupportMachines prometheus.Gauge
metricNumMachinesPerVersion *prometheus.Desc
metricMachinePlatforms *prometheus.GaugeVec
metricMachineSecureBootStatus *prometheus.GaugeVec
metricMachineUKIStatus *prometheus.GaugeVec
}
// Name implements controller.Controller interface.
@ -125,6 +144,16 @@ func (ctrl *MachineStatusMetricsController) initMetrics() {
Help: "Number of machines in the instance that were provisioned without using ImageFactory.",
})
ctrl.metricNumApproachingTalosVersionEndOfSupportMachines = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "omni_machines_approaching_talos_version_end_of_support",
Help: "Number of machines running a Talos version at or near the minimum supported version.",
})
ctrl.metricNumTalosVersionEndOfSupportMachines = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "omni_machines_talos_version_end_of_support",
Help: "Number of machines running a Talos version below the minimum supported version.",
})
ctrl.metricNumMachinesPerVersion = prometheus.NewDesc(
"omni_machines_version",
"Number of machines in the instance by version.",
@ -198,12 +227,22 @@ func (ctrl *MachineStatusMetricsController) Run(ctx context.Context, r controlle
}
}
if ctrl.nonImageFactoryDeprecation.Enabled {
if NonImageFactoryNotificationEnabled {
if err = ctrl.reconcileNonImageFactoryDeprecationNotification(ctx, r, metricsSpec); err != nil {
return err
}
}
if UnsupportedTalosVersionNotificationEnabled {
if err = ctrl.reconcileApproachingTalosVersionEndOfSupportNotification(ctx, r, metricsSpec); err != nil {
return err
}
if err = ctrl.reconcileTalosVersionEndOfSupportNotification(ctx, r, metricsSpec); err != nil {
return err
}
}
select {
case <-ctx.Done():
return nil
@ -237,8 +276,8 @@ func (ctrl *MachineStatusMetricsController) reconcileNonImageFactoryDeprecationN
if invalidSchematicCount > 0 {
return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationNonImageFactoryMachinesID),
func(res *omni.Notification) error {
res.TypedSpec().Value.Title = ctrl.nonImageFactoryDeprecation.Title
res.TypedSpec().Value.Body = fmt.Sprintf(ctrl.nonImageFactoryDeprecation.Body, invalidSchematicCount)
res.TypedSpec().Value.Title = nonImageFactoryNotificationTitle
res.TypedSpec().Value.Body = fmt.Sprintf(nonImageFactoryNotificationBody, invalidSchematicCount)
res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING
return nil
@ -251,6 +290,44 @@ func (ctrl *MachineStatusMetricsController) reconcileNonImageFactoryDeprecationN
return err
}
func (ctrl *MachineStatusMetricsController) reconcileApproachingTalosVersionEndOfSupportNotification(ctx context.Context, r controller.Runtime, metricsSpec *specs.MachineStatusMetricsSpec) error {
count := int(metricsSpec.ApproachingTalosVersionEndOfSupportMachinesCount)
if count > 0 {
return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationApproachingTalosVersionEndOfSupportID),
func(res *omni.Notification) error {
res.TypedSpec().Value.Title = approachingTalosEndOfSupportNotificationTitle
res.TypedSpec().Value.Body = fmt.Sprintf(approachingTalosEndOfSupportNotificationBody, count, constants.MinTalosVersion)
res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING
return nil
},
)
}
_, err := helpers.TeardownAndDestroy(ctx, r, omni.NewNotification(omni.NotificationApproachingTalosVersionEndOfSupportID).Metadata())
return err
}
func (ctrl *MachineStatusMetricsController) reconcileTalosVersionEndOfSupportNotification(ctx context.Context, r controller.Runtime, metricsSpec *specs.MachineStatusMetricsSpec) error {
count := int(metricsSpec.TalosVersionEndOfSupportMachinesCount)
if count > 0 {
return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationTalosVersionEndOfSupportID),
func(res *omni.Notification) error {
res.TypedSpec().Value.Title = talosVersionEndOfSupportNotificationTitle
res.TypedSpec().Value.Body = fmt.Sprintf(talosVersionEndOfSupportNotificationBody, count, constants.MinTalosVersion)
res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING
return nil
},
)
}
_, err := helpers.TeardownAndDestroy(ctx, r, omni.NewNotification(omni.NotificationTalosVersionEndOfSupportID).Metadata())
return err
}
func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omni.MachineStatus], numPendingMachines int) *specs.MachineStatusMetricsSpec {
platformMetrics := make(map[string]uint32, len(ctrl.platformNames))
for _, p := range ctrl.platformNames {
@ -270,7 +347,14 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
"false": 0,
}
var machines, connectedMachines, allocatedMachines, invalidSchematicMachines int
minTalosVer := semver.MustParse(constants.MinTalosVersion)
// Machines at MinTalosVersion or 1 minor above are "approaching talos end of support"
approachingThreshold := semver.Version{Major: minTalosVer.Major, Minor: minTalosVer.Minor + 2}
var (
machines, connectedMachines, allocatedMachines, invalidSchematicMachines int
approachingEndOfSupportMachines, endOfSupportTalosVersionMachines int
)
ctrl.versionsMu.Lock()
ctrl.versionsMap = map[nodeInfo]int32{}
@ -282,12 +366,25 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
connectedMachines++
}
if ms.TypedSpec().Value.TalosVersion != "" {
talosVersion := ms.TypedSpec().Value.TalosVersion
if talosVersion != "" {
ctrl.versionsMap[nodeInfo{
talosVersion: ms.TypedSpec().Value.TalosVersion,
talosVersion: talosVersion,
cluster: ms.TypedSpec().Value.Cluster,
connected: ms.TypedSpec().Value.Connected,
}]++
if ver, err := semver.ParseTolerant(strings.TrimLeft(talosVersion, "v")); err == nil {
ver.Pre = nil
switch {
case ver.LT(minTalosVer):
endOfSupportTalosVersionMachines++
case ver.LT(approachingThreshold):
approachingEndOfSupportMachines++
}
}
}
if ms.TypedSpec().Value.Cluster != "" {
@ -318,6 +415,8 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
ctrl.metricNumMachines.Set(float64(machines))
ctrl.metricNumConnectedMachines.Set(float64(connectedMachines))
ctrl.metricNumInvalidSchematicMachines.Set(float64(invalidSchematicMachines))
ctrl.metricNumApproachingTalosVersionEndOfSupportMachines.Set(float64(approachingEndOfSupportMachines))
ctrl.metricNumTalosVersionEndOfSupportMachines.Set(float64(endOfSupportTalosVersionMachines))
for key, num := range platformMetrics {
ctrl.metricMachinePlatforms.WithLabelValues(key).Set(float64(num))
@ -342,6 +441,8 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
RegisteredMachinesLimit: ctrl.maxRegisteredMachines,
RegistrationLimitReached: ctrl.maxRegisteredMachines > 0 && uint32(machines) >= ctrl.maxRegisteredMachines,
InvalidSchematicMachinesCount: uint32(invalidSchematicMachines),
ApproachingTalosVersionEndOfSupportMachinesCount: uint32(approachingEndOfSupportMachines),
TalosVersionEndOfSupportMachinesCount: uint32(endOfSupportTalosVersionMachines),
}
}
@ -365,6 +466,8 @@ func (ctrl *MachineStatusMetricsController) Collect(ch chan<- prometheus.Metric)
ctrl.metricNumMachines.Collect(ch)
ctrl.metricNumConnectedMachines.Collect(ch)
ctrl.metricNumInvalidSchematicMachines.Collect(ch)
ctrl.metricNumApproachingTalosVersionEndOfSupportMachines.Collect(ch)
ctrl.metricNumTalosVersionEndOfSupportMachines.Collect(ch)
ctrl.metricMachinePlatforms.Collect(ch)
ctrl.metricMachineSecureBootStatus.Collect(ch)
ctrl.metricMachineUKIStatus.Collect(ch)

View File

@ -22,14 +22,6 @@ import (
"github.com/siderolabs/omni/internal/backend/runtime/omni/controllers/testutils"
)
func newNonImageFactoryDeprecationConfig(enabled bool) omnictrl.NonImageFactoryDeprecationConfig {
return omnictrl.NonImageFactoryDeprecationConfig{
Enabled: enabled,
Title: "Non-ImageFactory Machines Detected",
Body: "%d machine(s) were provisioned without ImageFactory.",
}
}
func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) {
t.Parallel()
@ -82,7 +74,7 @@ func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) {
testutils.WithRuntime(ctx, t, testutils.TestOptions{},
func(_ context.Context, tc testutils.TestContext) {
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(tt.maxRegistered, omnictrl.NonImageFactoryDeprecationConfig{})))
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(tt.maxRegistered)))
},
func(ctx context.Context, tc testutils.TestContext) {
for _, id := range tt.machineIDs {
@ -113,43 +105,42 @@ func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) {
}
}
func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T) {
func TestMachineStatusMetricsController_UnsupportedTalosVersion(t *testing.T) {
t.Parallel()
require.True(t, omnictrl.UnsupportedTalosVersionNotificationEnabled, "this test assumes UnsupportedTalosVersionNotificationEnabled is true")
for _, tt := range []struct {
name string
invalidSchematicIDs []string
validSchematicIDs []string
enabled bool
expectNotification bool
expectCount int
machineVersions map[string]string
name string
expectApproachingCount int
expectEndOfSupportCount int
expectApproaching bool
expectEndOfSupport bool
}{
{
name: "disabled, invalid machines present",
invalidSchematicIDs: []string{"m1"},
enabled: false,
expectNotification: false,
name: "all machines above threshold",
machineVersions: map[string]string{"m1": "v1.10.0", "m2": "v1.11.0"},
},
{
name: "enabled, no invalid machines",
validSchematicIDs: []string{"m1", "m2"},
enabled: true,
expectNotification: false,
name: "machines approaching end of support",
machineVersions: map[string]string{"m1": "v1.8.0", "m2": "v1.9.0", "m3": "v1.11.0"},
expectApproaching: true,
expectApproachingCount: 2,
},
{
name: "enabled, some invalid machines",
invalidSchematicIDs: []string{"m1", "m2"},
validSchematicIDs: []string{"m3"},
enabled: true,
expectNotification: true,
expectCount: 2,
name: "machines past end of support",
machineVersions: map[string]string{"m1": "v1.7.0", "m2": "v1.6.0", "m3": "v1.11.0"},
expectEndOfSupport: true,
expectEndOfSupportCount: 2,
},
{
name: "enabled, all invalid machines",
invalidSchematicIDs: []string{"m1", "m2", "m3"},
enabled: true,
expectNotification: true,
expectCount: 3,
name: "mix of approaching and past end of support",
machineVersions: map[string]string{"m1": "v1.8.0", "m2": "v1.7.0", "m3": "v1.11.0"},
expectApproaching: true,
expectApproachingCount: 1,
expectEndOfSupport: true,
expectEndOfSupportCount: 1,
},
} {
t.Run(tt.name, func(t *testing.T) {
@ -160,40 +151,35 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T)
testutils.WithRuntime(ctx, t, testutils.TestOptions{},
func(_ context.Context, tc testutils.TestContext) {
require.NoError(t, tc.Runtime.RegisterController(
omnictrl.NewMachineStatusMetricsController(0, newNonImageFactoryDeprecationConfig(tt.enabled)),
))
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(0)))
},
func(ctx context.Context, tc testutils.TestContext) {
for _, id := range tt.invalidSchematicIDs {
for id, version := range tt.machineVersions {
ms := omni.NewMachineStatus(id)
ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
Invalid: true,
}
ms.TypedSpec().Value.TalosVersion = version
require.NoError(t, tc.State.Create(ctx, ms))
}
for _, id := range tt.validSchematicIDs {
ms := omni.NewMachineStatus(id)
ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
Id: "valid-id",
FullId: "valid-full-id",
}
require.NoError(t, tc.State.Create(ctx, ms))
}
if tt.expectNotification {
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID, func(res *omni.Notification, a *assert.Assertions) {
a.Equal("Non-ImageFactory Machines Detected", res.TypedSpec().Value.Title)
a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectCount))
if tt.expectApproaching {
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) {
a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectApproachingCount))
a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type)
})
} else {
// Notification should not exist. Sleep briefly since there is no state change to poll on.
time.Sleep(500 * time.Millisecond)
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID)
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID)
}
if tt.expectEndOfSupport {
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) {
a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectEndOfSupportCount))
a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type)
})
} else {
time.Sleep(500 * time.Millisecond)
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID)
}
},
)
@ -201,7 +187,7 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T)
}
}
func TestMachineStatusMetricsController_NonImageFactoryDeprecationTeardown(t *testing.T) {
func TestMachineStatusMetricsController_UnsupportedTalosVersionTeardown(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second)
@ -209,35 +195,27 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecationTeardown(t *te
testutils.WithRuntime(ctx, t, testutils.TestOptions{},
func(_ context.Context, tc testutils.TestContext) {
require.NoError(t, tc.Runtime.RegisterController(
omnictrl.NewMachineStatusMetricsController(0, newNonImageFactoryDeprecationConfig(true)),
))
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(0)))
},
func(ctx context.Context, tc testutils.TestContext) {
// Create a machine with invalid schematic.
ms := omni.NewMachineStatus("m1")
ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{Invalid: true}
ms.TypedSpec().Value.TalosVersion = "v1.7.0"
require.NoError(t, tc.State.Create(ctx, ms))
// Wait for the notification to appear.
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID, func(res *omni.Notification, a *assert.Assertions) {
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) {
a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type)
})
// Fix the machine schematic (no longer invalid).
_, err := safe.StateUpdateWithConflicts(ctx, tc.State, ms.Metadata(), func(res *omni.MachineStatus) error {
res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
Id: "valid-id",
FullId: "valid-full-id",
}
res.TypedSpec().Value.TalosVersion = "v1.11.0"
return nil
})
require.NoError(t, err)
// Notification should be torn down.
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID)
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID)
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID)
},
)
}

View File

@ -172,11 +172,7 @@ func NewRuntime(cfg *config.Params, talosClientFactory *talos.ClientFactory, dns
LBConfig: cfg.Services.LoadBalancer,
},
omnictrl.NewMachineCleanupController(),
omnictrl.NewMachineStatusMetricsController(cfg.Account.GetMaxRegisteredMachines(), omnictrl.NonImageFactoryDeprecationConfig{
Enabled: cfg.Notifications.NonImageFactoryDeprecation.GetEnabled(),
Title: cfg.Notifications.NonImageFactoryDeprecation.GetTitle(),
Body: cfg.Notifications.NonImageFactoryDeprecation.GetBody(),
}),
omnictrl.NewMachineStatusMetricsController(cfg.Account.GetMaxRegisteredMachines()),
omnictrl.NewVersionsController(cfg.Registries.GetImageFactoryBaseURL(), cfg.Features.GetEnableTalosPreReleaseVersions(), cfg.Registries.GetKubernetes()),
omnictrl.NewClusterLoadBalancerController(
cfg.Services.LoadBalancer.GetMinPort(),

View File

@ -919,39 +919,6 @@ func (s *LogsStripe) SetMinCommit(v uint32) {
s.MinCommit = &v
}
func (s *NonImageFactoryDeprecation) GetBody() string {
if s == nil || s.Body == nil {
return *new(string)
}
return *s.Body
}
func (s *NonImageFactoryDeprecation) SetBody(v string) {
s.Body = &v
}
func (s *NonImageFactoryDeprecation) GetEnabled() bool {
if s == nil || s.Enabled == nil {
return *new(bool)
}
return *s.Enabled
}
func (s *NonImageFactoryDeprecation) SetEnabled(v bool) {
s.Enabled = &v
}
func (s *NonImageFactoryDeprecation) GetTitle() string {
if s == nil || s.Title == nil {
return *new(string)
}
return *s.Title
}
func (s *NonImageFactoryDeprecation) SetTitle(v string) {
s.Title = &v
}
func (s *OIDC) GetAllowUnverifiedEmail() bool {
if s == nil || s.AllowUnverifiedEmail == nil {
return *new(bool)

View File

@ -30,7 +30,17 @@ import (
"github.com/siderolabs/omni/internal/pkg/jsonschema"
)
const wireguardDefaultPort = "50180"
const (
wireguardDefaultPort = "50180"
// UnsupportedTalosVersionFailOnStart controls whether Omni refuses to start when machines
// are running Talos versions below MinTalosVersion. This will be flipped to true in a future release.
UnsupportedTalosVersionFailOnStart = false
// NonImageFactoryFailOnStart controls whether Omni refuses to start when machines
// were provisioned without ImageFactory. This will be flipped to true in a future release.
NonImageFactoryFailOnStart = false
)
//go:embed schema.json
var schemaData string
@ -164,6 +174,18 @@ func (p *Params) ValidateState(ctx context.Context, st state.State) error {
}
}
if UnsupportedTalosVersionFailOnStart {
if err := validations.EnsureNoMachinesBelowMinTalosVersion(ctx, st); err != nil {
return err
}
}
if NonImageFactoryFailOnStart {
if err := validations.EnsureNoNonImageFactoryMachines(ctx, st); err != nil {
return err
}
}
return nil
}

View File

@ -564,10 +564,4 @@ func TestSchemaDefaults(t *testing.T) {
// features
assert.True(t, p.Features.GetEnableConfigDataCompression())
assert.True(t, p.Features.GetEnableClusterImport())
// notifications.nonImageFactoryDeprecation
assert.False(t, p.Notifications.NonImageFactoryDeprecation.GetEnabled())
assert.Equal(t, "Non-ImageFactory Machines Detected", p.Notifications.NonImageFactoryDeprecation.GetTitle())
assert.Equal(t, "%d machine(s) were provisioned without ImageFactory. Support for these machines will end after a future release. Please re-provision them using ImageFactory.",
p.Notifications.NonImageFactoryDeprecation.GetBody())
}

View File

@ -13,7 +13,6 @@
"registries",
"debug",
"features",
"notifications",
"eulaAccept"
],
"goJSONSchema": {
@ -58,10 +57,6 @@
"description": "Features contains feature flags to enable/disable various Omni features.",
"$ref": "#/definitions/Features"
},
"notifications": {
"description": "Notifications contains configuration for system notifications emitted by controllers.",
"$ref": "#/definitions/Notifications"
},
"eulaAccept": {
"description": "EulaAccept contains the identity of the person accepting the EULA via CLI flags or config.",
"$ref": "#/definitions/EulaAccept"
@ -1729,50 +1724,6 @@
}
}
},
"Notifications": {
"type": "object",
"required": [
"nonImageFactoryDeprecation"
],
"properties": {
"nonImageFactoryDeprecation": {
"description": "NonImageFactoryDeprecation contains configuration for the notification shown when machines are provisioned without using ImageFactory.",
"$ref": "#/definitions/NonImageFactoryDeprecation"
}
}
},
"NonImageFactoryDeprecation": {
"type": "object",
"properties": {
"enabled": {
"description": "Enabled controls whether the non-ImageFactory deprecation notification is shown when machines with invalid schematics are detected.",
"x-cli-flag": "non-image-factory-deprecation-enabled",
"type": "boolean",
"default": false,
"goJSONSchema": {
"pointer": true
}
},
"title": {
"description": "Title is the title of the non-ImageFactory deprecation notification.",
"x-cli-flag": "non-image-factory-deprecation-title",
"type": "string",
"default": "Non-ImageFactory Machines Detected",
"goJSONSchema": {
"pointer": true
}
},
"body": {
"description": "Body is the body of the non-ImageFactory deprecation notification. Use %d as a placeholder for the number of affected machines.",
"x-cli-flag": "non-image-factory-deprecation-body",
"type": "string",
"default": "%d machine(s) were provisioned without ImageFactory. Support for these machines will end after a future release. Please re-provision them using ImageFactory.",
"goJSONSchema": {
"pointer": true
}
}
}
},
"EulaAccept": {
"type": "object",
"dependentRequired": {

View File

@ -434,25 +434,6 @@ type LogsStripe struct {
MinCommit *uint32 `json:"minCommit,omitempty,omitzero" yaml:"minCommit,omitempty"`
}
type NonImageFactoryDeprecation struct {
// Body is the body of the non-ImageFactory deprecation notification. Use %d as a
// placeholder for the number of affected machines.
Body *string `json:"body,omitempty,omitzero" yaml:"body,omitempty"`
// Enabled controls whether the non-ImageFactory deprecation notification is shown
// when machines with invalid schematics are detected.
Enabled *bool `json:"enabled,omitempty,omitzero" yaml:"enabled,omitempty"`
// Title is the title of the non-ImageFactory deprecation notification.
Title *string `json:"title,omitempty,omitzero" yaml:"title,omitempty"`
}
type Notifications struct {
// NonImageFactoryDeprecation contains configuration for the notification shown
// when machines are provisioned without using ImageFactory.
NonImageFactoryDeprecation NonImageFactoryDeprecation `json:"nonImageFactoryDeprecation" yaml:"nonImageFactoryDeprecation"`
}
type OIDC struct {
// AllowUnverifiedEmail controls whether users with unverified emails (without
// email_verified claim) are allowed to authenticate.
@ -501,10 +482,6 @@ type Params struct {
// Logs contains logging-related configuration.
Logs Logs `json:"logs" yaml:"logs"`
// Notifications contains configuration for system notifications emitted by
// controllers.
Notifications Notifications `json:"notifications" yaml:"notifications"`
// Registries contains container image registries configuration.
Registries Registries `json:"registries" yaml:"registries"`

View File

@ -0,0 +1,41 @@
// Copyright (c) 2026 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
package validations
import (
"context"
"fmt"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/gertd/go-pluralize"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
)
// EnsureNoNonImageFactoryMachines checks that no machines have an invalid schematic (provisioned without ImageFactory).
func EnsureNoNonImageFactoryMachines(ctx context.Context, st state.State) error {
statuses, err := safe.ReaderListAll[*omni.MachineStatus](ctx, st)
if err != nil {
return err
}
var count int
for status := range statuses.All() {
if status.TypedSpec().Value.SchematicReady() && status.TypedSpec().Value.GetSchematic().GetInvalid() {
count++
}
}
if count == 0 {
return nil
}
return fmt.Errorf("detected %s provisioned without ImageFactory; "+
"please re-provision them using ImageFactory",
pluralize.NewClient().Pluralize("machine", count, true))
}

View File

@ -0,0 +1,35 @@
// Copyright (c) 2026 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
package validations
import (
"context"
"fmt"
"github.com/blang/semver/v4"
"github.com/cosi-project/runtime/pkg/state"
"github.com/gertd/go-pluralize"
"github.com/siderolabs/omni/client/pkg/constants"
)
// EnsureNoMachinesBelowMinTalosVersion checks that no machines are running Talos versions below MinTalosVersion.
func EnsureNoMachinesBelowMinTalosVersion(ctx context.Context, st state.State) error {
minVer := semver.MustParse(constants.MinTalosVersion)
count, err := getMachinesBelowTalosVersion(ctx, st, minVer)
if err != nil {
return err
}
if count == 0 {
return nil
}
return fmt.Errorf("detected %s running unsupported Talos versions (below %s); "+
"please upgrade the machines",
pluralize.NewClient().Pluralize("machine", int(count), true), constants.MinTalosVersion)
}

View File

@ -0,0 +1,153 @@
// Copyright (c) 2026 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
package validations_test
import (
"context"
"testing"
"time"
"github.com/cosi-project/runtime/pkg/state"
"github.com/cosi-project/runtime/pkg/state/impl/inmem"
"github.com/cosi-project/runtime/pkg/state/impl/namespaced"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/siderolabs/omni/client/api/omni/specs"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
"github.com/siderolabs/omni/internal/pkg/config/validations"
)
func TestEnsureNoMachinesBelowMinTalosVersion(t *testing.T) {
t.Parallel()
for _, tt := range []struct {
versions map[string]string
name string
expectErr bool
}{
{
name: "no machines",
versions: nil,
},
{
name: "machine below MinTalosVersion",
versions: map[string]string{"m1": "v1.7.0"},
expectErr: true,
},
{
name: "machine at MinTalosVersion",
versions: map[string]string{"m1": "v1.8.0"},
},
{
name: "machine above MinTalosVersion",
versions: map[string]string{"m1": "v1.10.0"},
},
{
name: "mix of below and above",
versions: map[string]string{"m1": "v1.7.0", "m2": "v1.10.0"},
expectErr: true,
},
{
name: "machine with empty version is ignored",
versions: map[string]string{"m1": ""},
},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
t.Cleanup(cancel)
st := state.WrapCore(namespaced.NewState(inmem.Build))
for id, version := range tt.versions {
ms := omni.NewMachineStatus(id)
ms.TypedSpec().Value.TalosVersion = version
require.NoError(t, st.Create(ctx, ms))
}
err := validations.EnsureNoMachinesBelowMinTalosVersion(ctx, st)
if tt.expectErr {
assert.ErrorContains(t, err, "running unsupported Talos versions")
} else {
assert.NoError(t, err)
}
})
}
}
func TestEnsureNoNonImageFactoryMachines(t *testing.T) {
t.Parallel()
for _, tt := range []struct {
machines map[string]*specs.MachineStatusSpec_Schematic
name string
expectErr bool
}{
{
name: "no machines",
machines: nil,
},
{
name: "valid schematic",
machines: map[string]*specs.MachineStatusSpec_Schematic{
"m1": {Id: "abc", FullId: "abc123"},
},
},
{
name: "no schematic (not ready)",
machines: map[string]*specs.MachineStatusSpec_Schematic{
"m1": nil,
},
},
{
name: "invalid schematic",
machines: map[string]*specs.MachineStatusSpec_Schematic{
"m1": {Invalid: true},
},
expectErr: true,
},
{
name: "mix of valid and invalid",
machines: map[string]*specs.MachineStatusSpec_Schematic{
"m1": {Id: "abc", FullId: "abc123"},
"m2": {Invalid: true},
},
expectErr: true,
},
{
name: "agent mode is not counted",
machines: map[string]*specs.MachineStatusSpec_Schematic{
"m1": {InAgentMode: true, Invalid: true},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
t.Cleanup(cancel)
st := state.WrapCore(namespaced.NewState(inmem.Build))
for id, schematic := range tt.machines {
ms := omni.NewMachineStatus(id)
ms.TypedSpec().Value.Schematic = schematic
require.NoError(t, st.Create(ctx, ms))
}
err := validations.EnsureNoNonImageFactoryMachines(ctx, st)
if tt.expectErr {
assert.ErrorContains(t, err, "provisioned without ImageFactory")
} else {
assert.NoError(t, err)
}
})
}
}