mirror of
https://github.com/siderolabs/omni.git
synced 2026-05-04 22:26:13 +02:00
feat: add Talos version end-of-support notifications and metrics
* Track machines running Talos versions approaching or past end of support relative to MinTalosVersion. * Replace the config-driven non-ImageFactory deprecation notification with hardcoded constants and add two new notifications (approaching end of support, end of support reached) with corresponding Prometheus metrics. * Add startup validation hooks (currently disabled) that will refuse to start when unsupported machines are detected. * Fix frontend notification namespace from Default to Ephemeral. Signed-off-by: Oguz Kilcan <oguz.kilcan@siderolabs.com>
This commit is contained in:
parent
302e9175a3
commit
475e3660d7
@ -6678,19 +6678,21 @@ func (x *MachineExtensionsStatusSpec) GetTalosVersion() string {
|
||||
|
||||
// MachineStatusMetricsSpec provides aggregated state of the number of registered and connected machines for the Omni instance.
|
||||
type MachineStatusMetricsSpec struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
RegisteredMachinesCount uint32 `protobuf:"varint,1,opt,name=registered_machines_count,json=registeredMachinesCount,proto3" json:"registered_machines_count,omitempty"`
|
||||
ConnectedMachinesCount uint32 `protobuf:"varint,2,opt,name=connected_machines_count,json=connectedMachinesCount,proto3" json:"connected_machines_count,omitempty"`
|
||||
AllocatedMachinesCount uint32 `protobuf:"varint,3,opt,name=allocated_machines_count,json=allocatedMachinesCount,proto3" json:"allocated_machines_count,omitempty"`
|
||||
PendingMachinesCount uint32 `protobuf:"varint,4,opt,name=pending_machines_count,json=pendingMachinesCount,proto3" json:"pending_machines_count,omitempty"`
|
||||
Platforms map[string]uint32 `protobuf:"bytes,6,rep,name=platforms,proto3" json:"platforms,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
|
||||
SecureBootStatus map[string]uint32 `protobuf:"bytes,7,rep,name=secure_boot_status,json=secureBootStatus,proto3" json:"secure_boot_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
|
||||
UkiStatus map[string]uint32 `protobuf:"bytes,8,rep,name=uki_status,json=ukiStatus,proto3" json:"uki_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
|
||||
RegisteredMachinesLimit uint32 `protobuf:"varint,9,opt,name=registered_machines_limit,json=registeredMachinesLimit,proto3" json:"registered_machines_limit,omitempty"`
|
||||
RegistrationLimitReached bool `protobuf:"varint,10,opt,name=registration_limit_reached,json=registrationLimitReached,proto3" json:"registration_limit_reached,omitempty"`
|
||||
InvalidSchematicMachinesCount uint32 `protobuf:"varint,11,opt,name=invalid_schematic_machines_count,json=invalidSchematicMachinesCount,proto3" json:"invalid_schematic_machines_count,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
RegisteredMachinesCount uint32 `protobuf:"varint,1,opt,name=registered_machines_count,json=registeredMachinesCount,proto3" json:"registered_machines_count,omitempty"`
|
||||
ConnectedMachinesCount uint32 `protobuf:"varint,2,opt,name=connected_machines_count,json=connectedMachinesCount,proto3" json:"connected_machines_count,omitempty"`
|
||||
AllocatedMachinesCount uint32 `protobuf:"varint,3,opt,name=allocated_machines_count,json=allocatedMachinesCount,proto3" json:"allocated_machines_count,omitempty"`
|
||||
PendingMachinesCount uint32 `protobuf:"varint,4,opt,name=pending_machines_count,json=pendingMachinesCount,proto3" json:"pending_machines_count,omitempty"`
|
||||
Platforms map[string]uint32 `protobuf:"bytes,6,rep,name=platforms,proto3" json:"platforms,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
|
||||
SecureBootStatus map[string]uint32 `protobuf:"bytes,7,rep,name=secure_boot_status,json=secureBootStatus,proto3" json:"secure_boot_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
|
||||
UkiStatus map[string]uint32 `protobuf:"bytes,8,rep,name=uki_status,json=ukiStatus,proto3" json:"uki_status,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
|
||||
RegisteredMachinesLimit uint32 `protobuf:"varint,9,opt,name=registered_machines_limit,json=registeredMachinesLimit,proto3" json:"registered_machines_limit,omitempty"`
|
||||
RegistrationLimitReached bool `protobuf:"varint,10,opt,name=registration_limit_reached,json=registrationLimitReached,proto3" json:"registration_limit_reached,omitempty"`
|
||||
InvalidSchematicMachinesCount uint32 `protobuf:"varint,11,opt,name=invalid_schematic_machines_count,json=invalidSchematicMachinesCount,proto3" json:"invalid_schematic_machines_count,omitempty"`
|
||||
ApproachingTalosVersionEndOfSupportMachinesCount uint32 `protobuf:"varint,12,opt,name=approaching_talos_version_end_of_support_machines_count,json=approachingTalosVersionEndOfSupportMachinesCount,proto3" json:"approaching_talos_version_end_of_support_machines_count,omitempty"`
|
||||
TalosVersionEndOfSupportMachinesCount uint32 `protobuf:"varint,13,opt,name=talos_version_end_of_support_machines_count,json=talosVersionEndOfSupportMachinesCount,proto3" json:"talos_version_end_of_support_machines_count,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *MachineStatusMetricsSpec) Reset() {
|
||||
@ -6793,6 +6795,20 @@ func (x *MachineStatusMetricsSpec) GetInvalidSchematicMachinesCount() uint32 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *MachineStatusMetricsSpec) GetApproachingTalosVersionEndOfSupportMachinesCount() uint32 {
|
||||
if x != nil {
|
||||
return x.ApproachingTalosVersionEndOfSupportMachinesCount
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *MachineStatusMetricsSpec) GetTalosVersionEndOfSupportMachinesCount() uint32 {
|
||||
if x != nil {
|
||||
return x.TalosVersionEndOfSupportMachinesCount
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ClusterMetricsSpec contains metrics about the clusters in the Omni instance.
|
||||
type ClusterMetricsSpec struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
@ -11665,7 +11681,7 @@ const file_omni_specs_omni_proto_rawDesc = "" +
|
||||
"\tInstalled\x10\x00\x12\x0e\n" +
|
||||
"\n" +
|
||||
"Installing\x10\x01\x12\f\n" +
|
||||
"\bRemoving\x10\x02\"\x8c\a\n" +
|
||||
"\bRemoving\x10\x02\"\xdb\b\n" +
|
||||
"\x18MachineStatusMetricsSpec\x12:\n" +
|
||||
"\x19registered_machines_count\x18\x01 \x01(\rR\x17registeredMachinesCount\x128\n" +
|
||||
"\x18connected_machines_count\x18\x02 \x01(\rR\x16connectedMachinesCount\x128\n" +
|
||||
@ -11678,7 +11694,9 @@ const file_omni_specs_omni_proto_rawDesc = "" +
|
||||
"\x19registered_machines_limit\x18\t \x01(\rR\x17registeredMachinesLimit\x12<\n" +
|
||||
"\x1aregistration_limit_reached\x18\n" +
|
||||
" \x01(\bR\x18registrationLimitReached\x12G\n" +
|
||||
" invalid_schematic_machines_count\x18\v \x01(\rR\x1dinvalidSchematicMachinesCount\x1a<\n" +
|
||||
" invalid_schematic_machines_count\x18\v \x01(\rR\x1dinvalidSchematicMachinesCount\x12q\n" +
|
||||
"7approaching_talos_version_end_of_support_machines_count\x18\f \x01(\rR0approachingTalosVersionEndOfSupportMachinesCount\x12Z\n" +
|
||||
"+talos_version_end_of_support_machines_count\x18\r \x01(\rR%talosVersionEndOfSupportMachinesCount\x1a<\n" +
|
||||
"\x0ePlatformsEntry\x12\x10\n" +
|
||||
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
|
||||
"\x05value\x18\x02 \x01(\rR\x05value:\x028\x01\x1aC\n" +
|
||||
|
||||
@ -1356,6 +1356,8 @@ message MachineStatusMetricsSpec {
|
||||
uint32 registered_machines_limit = 9;
|
||||
bool registration_limit_reached = 10;
|
||||
uint32 invalid_schematic_machines_count = 11;
|
||||
uint32 approaching_talos_version_end_of_support_machines_count = 12;
|
||||
uint32 talos_version_end_of_support_machines_count = 13;
|
||||
}
|
||||
|
||||
// ClusterMetricsSpec contains metrics about the clusters in the Omni instance.
|
||||
|
||||
@ -2450,6 +2450,8 @@ func (m *MachineStatusMetricsSpec) CloneVT() *MachineStatusMetricsSpec {
|
||||
r.RegisteredMachinesLimit = m.RegisteredMachinesLimit
|
||||
r.RegistrationLimitReached = m.RegistrationLimitReached
|
||||
r.InvalidSchematicMachinesCount = m.InvalidSchematicMachinesCount
|
||||
r.ApproachingTalosVersionEndOfSupportMachinesCount = m.ApproachingTalosVersionEndOfSupportMachinesCount
|
||||
r.TalosVersionEndOfSupportMachinesCount = m.TalosVersionEndOfSupportMachinesCount
|
||||
if rhs := m.Platforms; rhs != nil {
|
||||
tmpContainer := make(map[string]uint32, len(rhs))
|
||||
for k, v := range rhs {
|
||||
@ -6626,6 +6628,12 @@ func (this *MachineStatusMetricsSpec) EqualVT(that *MachineStatusMetricsSpec) bo
|
||||
if this.InvalidSchematicMachinesCount != that.InvalidSchematicMachinesCount {
|
||||
return false
|
||||
}
|
||||
if this.ApproachingTalosVersionEndOfSupportMachinesCount != that.ApproachingTalosVersionEndOfSupportMachinesCount {
|
||||
return false
|
||||
}
|
||||
if this.TalosVersionEndOfSupportMachinesCount != that.TalosVersionEndOfSupportMachinesCount {
|
||||
return false
|
||||
}
|
||||
return string(this.unknownFields) == string(that.unknownFields)
|
||||
}
|
||||
|
||||
@ -14280,6 +14288,16 @@ func (m *MachineStatusMetricsSpec) MarshalToSizedBufferVT(dAtA []byte) (int, err
|
||||
i -= len(m.unknownFields)
|
||||
copy(dAtA[i:], m.unknownFields)
|
||||
}
|
||||
if m.TalosVersionEndOfSupportMachinesCount != 0 {
|
||||
i = protohelpers.EncodeVarint(dAtA, i, uint64(m.TalosVersionEndOfSupportMachinesCount))
|
||||
i--
|
||||
dAtA[i] = 0x68
|
||||
}
|
||||
if m.ApproachingTalosVersionEndOfSupportMachinesCount != 0 {
|
||||
i = protohelpers.EncodeVarint(dAtA, i, uint64(m.ApproachingTalosVersionEndOfSupportMachinesCount))
|
||||
i--
|
||||
dAtA[i] = 0x60
|
||||
}
|
||||
if m.InvalidSchematicMachinesCount != 0 {
|
||||
i = protohelpers.EncodeVarint(dAtA, i, uint64(m.InvalidSchematicMachinesCount))
|
||||
i--
|
||||
@ -18939,6 +18957,12 @@ func (m *MachineStatusMetricsSpec) SizeVT() (n int) {
|
||||
if m.InvalidSchematicMachinesCount != 0 {
|
||||
n += 1 + protohelpers.SizeOfVarint(uint64(m.InvalidSchematicMachinesCount))
|
||||
}
|
||||
if m.ApproachingTalosVersionEndOfSupportMachinesCount != 0 {
|
||||
n += 1 + protohelpers.SizeOfVarint(uint64(m.ApproachingTalosVersionEndOfSupportMachinesCount))
|
||||
}
|
||||
if m.TalosVersionEndOfSupportMachinesCount != 0 {
|
||||
n += 1 + protohelpers.SizeOfVarint(uint64(m.TalosVersionEndOfSupportMachinesCount))
|
||||
}
|
||||
n += len(m.unknownFields)
|
||||
return n
|
||||
}
|
||||
@ -36756,6 +36780,44 @@ func (m *MachineStatusMetricsSpec) UnmarshalVT(dAtA []byte) error {
|
||||
break
|
||||
}
|
||||
}
|
||||
case 12:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field ApproachingTalosVersionEndOfSupportMachinesCount", wireType)
|
||||
}
|
||||
m.ApproachingTalosVersionEndOfSupportMachinesCount = 0
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if shift >= 64 {
|
||||
return protohelpers.ErrIntOverflow
|
||||
}
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := dAtA[iNdEx]
|
||||
iNdEx++
|
||||
m.ApproachingTalosVersionEndOfSupportMachinesCount |= uint32(b&0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
case 13:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field TalosVersionEndOfSupportMachinesCount", wireType)
|
||||
}
|
||||
m.TalosVersionEndOfSupportMachinesCount = 0
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if shift >= 64 {
|
||||
return protohelpers.ErrIntOverflow
|
||||
}
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := dAtA[iNdEx]
|
||||
iNdEx++
|
||||
m.TalosVersionEndOfSupportMachinesCount |= uint32(b&0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
default:
|
||||
iNdEx = preIndex
|
||||
skippy, err := protohelpers.Skip(dAtA[iNdEx:])
|
||||
|
||||
@ -34,6 +34,14 @@ const (
|
||||
// NotificationNonImageFactoryMachinesID is the ID for the non-ImageFactory machines deprecation notification.
|
||||
// tsgen:NotificationNonImageFactoryMachinesID
|
||||
NotificationNonImageFactoryMachinesID = "non-image-factory-machines"
|
||||
|
||||
// NotificationApproachingTalosVersionEndOfSupportID is the ID for the notification shown when machines are approaching Talos version end of support.
|
||||
// tsgen:NotificationApproachingTalosVersionEndOfSupportID
|
||||
NotificationApproachingTalosVersionEndOfSupportID = "approaching-talos-version-end-of-support"
|
||||
|
||||
// NotificationTalosVersionEndOfSupportID is the ID for the notification shown when machines have reached Talos version end of support.
|
||||
// tsgen:NotificationTalosVersionEndOfSupportID
|
||||
NotificationTalosVersionEndOfSupportID = "talos-version-end-of-support"
|
||||
)
|
||||
|
||||
// Notification describes a generic notification emitted by a controller.
|
||||
|
||||
@ -164,7 +164,6 @@ func buildRootCommand() (*cobra.Command, error) {
|
||||
defineStorageFlags(rootCmd, rootCmdFlagBinder, flagConfig)
|
||||
defineRegistriesFlags(rootCmdFlagBinder, flagConfig)
|
||||
defineFeatureFlags(rootCmdFlagBinder, flagConfig)
|
||||
defineNotificationFlags(rootCmdFlagBinder, flagConfig)
|
||||
defineDebugFlags(rootCmdFlagBinder, flagConfig)
|
||||
defineEtcdBackupsFlags(rootCmd, rootCmdFlagBinder, flagConfig)
|
||||
defineEulaFlags(rootCmd, rootCmdFlagBinder, flagConfig)
|
||||
@ -353,12 +352,6 @@ func defineFeatureFlags(b *FlagBinder, flagConfig *config.Params) {
|
||||
b.BoolVar("features.enableClusterImport", &flagConfig.Features.EnableClusterImport)
|
||||
}
|
||||
|
||||
func defineNotificationFlags(b *FlagBinder, flagConfig *config.Params) {
|
||||
b.BoolVar("notifications.nonImageFactoryDeprecation.enabled", &flagConfig.Notifications.NonImageFactoryDeprecation.Enabled)
|
||||
b.StringVar("notifications.nonImageFactoryDeprecation.title", &flagConfig.Notifications.NonImageFactoryDeprecation.Title)
|
||||
b.StringVar("notifications.nonImageFactoryDeprecation.body", &flagConfig.Notifications.NonImageFactoryDeprecation.Body)
|
||||
}
|
||||
|
||||
func defineDebugFlags(b *FlagBinder, flagConfig *config.Params) {
|
||||
b.StringVar("debug.pprof.endpoint", &flagConfig.Debug.Pprof.Endpoint)
|
||||
b.StringVar("debug.server.endpoint", &flagConfig.Debug.Server.Endpoint)
|
||||
|
||||
@ -928,6 +928,8 @@ export type MachineStatusMetricsSpec = {
|
||||
registered_machines_limit?: number
|
||||
registration_limit_reached?: boolean
|
||||
invalid_schematic_machines_count?: number
|
||||
approaching_talos_version_end_of_support_machines_count?: number
|
||||
talos_version_end_of_support_machines_count?: number
|
||||
}
|
||||
|
||||
export type ClusterMetricsSpec = {
|
||||
|
||||
@ -214,6 +214,8 @@ export const NodeForceDestroyRequestType = "NodeForceDestroyRequests.omni.sidero
|
||||
export const NotificationType = "Notifications.omni.sidero.dev";
|
||||
export const NotificationMachineRegistrationLimitID = "machine-registration-limit";
|
||||
export const NotificationNonImageFactoryMachinesID = "non-image-factory-machines";
|
||||
export const NotificationApproachingTalosVersionEndOfSupportID = "approaching-talos-version-end-of-support";
|
||||
export const NotificationTalosVersionEndOfSupportID = "talos-version-end-of-support";
|
||||
export const OngoingTaskType = "OngoingTasks.omni.sidero.dev";
|
||||
export const RedactedClusterMachineConfigType = "RedactedClusterMachineConfigs.omni.sidero.dev";
|
||||
export const RotateKubernetesCAType = "RotateKubernetesCAs.omni.sidero.dev";
|
||||
|
||||
@ -7,7 +7,7 @@ import { createWatchStreamHandler } from '@msw/helpers'
|
||||
import type { Meta, StoryObj } from '@storybook/vue3-vite'
|
||||
|
||||
import { type NotificationSpec, NotificationSpecType } from '@/api/omni/specs/omni.pb'
|
||||
import { DefaultNamespace, NotificationType } from '@/api/resources'
|
||||
import { EphemeralNamespace, NotificationType } from '@/api/resources'
|
||||
|
||||
import THeader from './THeader.vue'
|
||||
|
||||
@ -24,13 +24,13 @@ export const Default: Story = {
|
||||
handlers: [
|
||||
createWatchStreamHandler<NotificationSpec>({
|
||||
expectedOptions: {
|
||||
namespace: DefaultNamespace,
|
||||
namespace: EphemeralNamespace,
|
||||
type: NotificationType,
|
||||
},
|
||||
initialResources: faker.helpers.multiple(
|
||||
() => ({
|
||||
metadata: {
|
||||
namespace: DefaultNamespace,
|
||||
namespace: EphemeralNamespace,
|
||||
type: NotificationType,
|
||||
id: faker.string.uuid(),
|
||||
},
|
||||
|
||||
@ -10,7 +10,7 @@ import { computed, ref } from 'vue'
|
||||
|
||||
import { Runtime } from '@/api/common/omni.pb'
|
||||
import { type NotificationSpec, NotificationSpecType } from '@/api/omni/specs/omni.pb'
|
||||
import { DefaultNamespace, NotificationType } from '@/api/resources'
|
||||
import { EphemeralNamespace, NotificationType } from '@/api/resources'
|
||||
import IconButton from '@/components/Button/IconButton.vue'
|
||||
import TButton from '@/components/Button/TButton.vue'
|
||||
import TIcon, { type IconType } from '@/components/Icon/TIcon.vue'
|
||||
@ -28,7 +28,7 @@ defineEmits<{ toggleSidebar: [] }>()
|
||||
const { data } = useResourceWatch<NotificationSpec>({
|
||||
runtime: Runtime.Omni,
|
||||
resource: {
|
||||
namespace: DefaultNamespace,
|
||||
namespace: EphemeralNamespace,
|
||||
type: NotificationType,
|
||||
},
|
||||
})
|
||||
|
||||
@ -10,9 +10,11 @@ import (
|
||||
"fmt"
|
||||
"iter"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/blang/semver/v4"
|
||||
"github.com/cosi-project/runtime/pkg/controller"
|
||||
"github.com/cosi-project/runtime/pkg/resource"
|
||||
"github.com/cosi-project/runtime/pkg/safe"
|
||||
@ -22,6 +24,7 @@ import (
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/siderolabs/omni/client/api/omni/specs"
|
||||
"github.com/siderolabs/omni/client/pkg/constants"
|
||||
"github.com/siderolabs/omni/client/pkg/omni/resources"
|
||||
"github.com/siderolabs/omni/client/pkg/omni/resources/infra"
|
||||
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
|
||||
@ -34,18 +37,34 @@ type nodeInfo struct {
|
||||
connected bool
|
||||
}
|
||||
|
||||
// NonImageFactoryDeprecationConfig contains configuration for the non-ImageFactory deprecation notification.
|
||||
type NonImageFactoryDeprecationConfig struct {
|
||||
Title string
|
||||
Body string
|
||||
Enabled bool
|
||||
}
|
||||
const (
|
||||
// NonImageFactoryNotificationEnabled controls whether a notification is shown for non-ImageFactory machines.
|
||||
// This will be flipped to true in a future release.
|
||||
NonImageFactoryNotificationEnabled = false
|
||||
|
||||
// UnsupportedTalosVersionNotificationEnabled controls whether notifications are shown for unsupported Talos versions.
|
||||
UnsupportedTalosVersionNotificationEnabled = true
|
||||
|
||||
talosVersionSupportPolicyDocsURL = "https://docs.siderolabs.com/omni/getting-started/talos-version-support-policy"
|
||||
|
||||
nonImageFactoryNotificationTitle = "Non-ImageFactory Machines Detected"
|
||||
nonImageFactoryNotificationBody = "%d machine(s) were provisioned without ImageFactory." +
|
||||
" Omni will refuse to start with non-ImageFactory machines in a future release." +
|
||||
" Please re-provision them using ImageFactory."
|
||||
|
||||
approachingTalosEndOfSupportNotificationTitle = "Talos Version Approaching End of Support"
|
||||
approachingTalosEndOfSupportNotificationBody = "%d machine(s) are running a Talos version that will lose Omni support soon." +
|
||||
" The minimum supported version is %s. Please upgrade: " + talosVersionSupportPolicyDocsURL
|
||||
|
||||
talosVersionEndOfSupportNotificationTitle = "Unsupported Talos Version Detected"
|
||||
talosVersionEndOfSupportNotificationBody = "%d machine(s) are running unsupported Talos versions (below %s)." +
|
||||
" Please upgrade immediately: " + talosVersionSupportPolicyDocsURL
|
||||
)
|
||||
|
||||
// NewMachineStatusMetricsController creates a new MachineStatusMetricsController.
|
||||
func NewMachineStatusMetricsController(maxRegisteredMachines uint32, nonImageFactoryDeprecation NonImageFactoryDeprecationConfig) *MachineStatusMetricsController {
|
||||
func NewMachineStatusMetricsController(maxRegisteredMachines uint32) *MachineStatusMetricsController {
|
||||
return &MachineStatusMetricsController{
|
||||
maxRegisteredMachines: maxRegisteredMachines,
|
||||
nonImageFactoryDeprecation: nonImageFactoryDeprecation,
|
||||
maxRegisteredMachines: maxRegisteredMachines,
|
||||
}
|
||||
}
|
||||
|
||||
@ -58,19 +77,19 @@ type MachineStatusMetricsController struct {
|
||||
|
||||
metricsOnce sync.Once
|
||||
|
||||
nonImageFactoryDeprecation NonImageFactoryDeprecationConfig
|
||||
|
||||
maxRegisteredMachines uint32
|
||||
|
||||
platformNames []string
|
||||
|
||||
metricNumMachines prometheus.Gauge
|
||||
metricNumConnectedMachines prometheus.Gauge
|
||||
metricNumInvalidSchematicMachines prometheus.Gauge
|
||||
metricNumMachinesPerVersion *prometheus.Desc
|
||||
metricMachinePlatforms *prometheus.GaugeVec
|
||||
metricMachineSecureBootStatus *prometheus.GaugeVec
|
||||
metricMachineUKIStatus *prometheus.GaugeVec
|
||||
metricNumMachines prometheus.Gauge
|
||||
metricNumConnectedMachines prometheus.Gauge
|
||||
metricNumInvalidSchematicMachines prometheus.Gauge
|
||||
metricNumApproachingTalosVersionEndOfSupportMachines prometheus.Gauge
|
||||
metricNumTalosVersionEndOfSupportMachines prometheus.Gauge
|
||||
metricNumMachinesPerVersion *prometheus.Desc
|
||||
metricMachinePlatforms *prometheus.GaugeVec
|
||||
metricMachineSecureBootStatus *prometheus.GaugeVec
|
||||
metricMachineUKIStatus *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// Name implements controller.Controller interface.
|
||||
@ -125,6 +144,16 @@ func (ctrl *MachineStatusMetricsController) initMetrics() {
|
||||
Help: "Number of machines in the instance that were provisioned without using ImageFactory.",
|
||||
})
|
||||
|
||||
ctrl.metricNumApproachingTalosVersionEndOfSupportMachines = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "omni_machines_approaching_talos_version_end_of_support",
|
||||
Help: "Number of machines running a Talos version at or near the minimum supported version.",
|
||||
})
|
||||
|
||||
ctrl.metricNumTalosVersionEndOfSupportMachines = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "omni_machines_talos_version_end_of_support",
|
||||
Help: "Number of machines running a Talos version below the minimum supported version.",
|
||||
})
|
||||
|
||||
ctrl.metricNumMachinesPerVersion = prometheus.NewDesc(
|
||||
"omni_machines_version",
|
||||
"Number of machines in the instance by version.",
|
||||
@ -198,12 +227,22 @@ func (ctrl *MachineStatusMetricsController) Run(ctx context.Context, r controlle
|
||||
}
|
||||
}
|
||||
|
||||
if ctrl.nonImageFactoryDeprecation.Enabled {
|
||||
if NonImageFactoryNotificationEnabled {
|
||||
if err = ctrl.reconcileNonImageFactoryDeprecationNotification(ctx, r, metricsSpec); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if UnsupportedTalosVersionNotificationEnabled {
|
||||
if err = ctrl.reconcileApproachingTalosVersionEndOfSupportNotification(ctx, r, metricsSpec); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = ctrl.reconcileTalosVersionEndOfSupportNotification(ctx, r, metricsSpec); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
@ -237,8 +276,8 @@ func (ctrl *MachineStatusMetricsController) reconcileNonImageFactoryDeprecationN
|
||||
if invalidSchematicCount > 0 {
|
||||
return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationNonImageFactoryMachinesID),
|
||||
func(res *omni.Notification) error {
|
||||
res.TypedSpec().Value.Title = ctrl.nonImageFactoryDeprecation.Title
|
||||
res.TypedSpec().Value.Body = fmt.Sprintf(ctrl.nonImageFactoryDeprecation.Body, invalidSchematicCount)
|
||||
res.TypedSpec().Value.Title = nonImageFactoryNotificationTitle
|
||||
res.TypedSpec().Value.Body = fmt.Sprintf(nonImageFactoryNotificationBody, invalidSchematicCount)
|
||||
res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING
|
||||
|
||||
return nil
|
||||
@ -251,6 +290,44 @@ func (ctrl *MachineStatusMetricsController) reconcileNonImageFactoryDeprecationN
|
||||
return err
|
||||
}
|
||||
|
||||
func (ctrl *MachineStatusMetricsController) reconcileApproachingTalosVersionEndOfSupportNotification(ctx context.Context, r controller.Runtime, metricsSpec *specs.MachineStatusMetricsSpec) error {
|
||||
count := int(metricsSpec.ApproachingTalosVersionEndOfSupportMachinesCount)
|
||||
if count > 0 {
|
||||
return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationApproachingTalosVersionEndOfSupportID),
|
||||
func(res *omni.Notification) error {
|
||||
res.TypedSpec().Value.Title = approachingTalosEndOfSupportNotificationTitle
|
||||
res.TypedSpec().Value.Body = fmt.Sprintf(approachingTalosEndOfSupportNotificationBody, count, constants.MinTalosVersion)
|
||||
res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING
|
||||
|
||||
return nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
_, err := helpers.TeardownAndDestroy(ctx, r, omni.NewNotification(omni.NotificationApproachingTalosVersionEndOfSupportID).Metadata())
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (ctrl *MachineStatusMetricsController) reconcileTalosVersionEndOfSupportNotification(ctx context.Context, r controller.Runtime, metricsSpec *specs.MachineStatusMetricsSpec) error {
|
||||
count := int(metricsSpec.TalosVersionEndOfSupportMachinesCount)
|
||||
if count > 0 {
|
||||
return safe.WriterModify(ctx, r, omni.NewNotification(omni.NotificationTalosVersionEndOfSupportID),
|
||||
func(res *omni.Notification) error {
|
||||
res.TypedSpec().Value.Title = talosVersionEndOfSupportNotificationTitle
|
||||
res.TypedSpec().Value.Body = fmt.Sprintf(talosVersionEndOfSupportNotificationBody, count, constants.MinTalosVersion)
|
||||
res.TypedSpec().Value.Type = specs.NotificationSpec_WARNING
|
||||
|
||||
return nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
_, err := helpers.TeardownAndDestroy(ctx, r, omni.NewNotification(omni.NotificationTalosVersionEndOfSupportID).Metadata())
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omni.MachineStatus], numPendingMachines int) *specs.MachineStatusMetricsSpec {
|
||||
platformMetrics := make(map[string]uint32, len(ctrl.platformNames))
|
||||
for _, p := range ctrl.platformNames {
|
||||
@ -270,7 +347,14 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
|
||||
"false": 0,
|
||||
}
|
||||
|
||||
var machines, connectedMachines, allocatedMachines, invalidSchematicMachines int
|
||||
minTalosVer := semver.MustParse(constants.MinTalosVersion)
|
||||
// Machines at MinTalosVersion or 1 minor above are "approaching talos end of support"
|
||||
approachingThreshold := semver.Version{Major: minTalosVer.Major, Minor: minTalosVer.Minor + 2}
|
||||
|
||||
var (
|
||||
machines, connectedMachines, allocatedMachines, invalidSchematicMachines int
|
||||
approachingEndOfSupportMachines, endOfSupportTalosVersionMachines int
|
||||
)
|
||||
|
||||
ctrl.versionsMu.Lock()
|
||||
ctrl.versionsMap = map[nodeInfo]int32{}
|
||||
@ -282,12 +366,25 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
|
||||
connectedMachines++
|
||||
}
|
||||
|
||||
if ms.TypedSpec().Value.TalosVersion != "" {
|
||||
talosVersion := ms.TypedSpec().Value.TalosVersion
|
||||
|
||||
if talosVersion != "" {
|
||||
ctrl.versionsMap[nodeInfo{
|
||||
talosVersion: ms.TypedSpec().Value.TalosVersion,
|
||||
talosVersion: talosVersion,
|
||||
cluster: ms.TypedSpec().Value.Cluster,
|
||||
connected: ms.TypedSpec().Value.Connected,
|
||||
}]++
|
||||
|
||||
if ver, err := semver.ParseTolerant(strings.TrimLeft(talosVersion, "v")); err == nil {
|
||||
ver.Pre = nil
|
||||
|
||||
switch {
|
||||
case ver.LT(minTalosVer):
|
||||
endOfSupportTalosVersionMachines++
|
||||
case ver.LT(approachingThreshold):
|
||||
approachingEndOfSupportMachines++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ms.TypedSpec().Value.Cluster != "" {
|
||||
@ -318,6 +415,8 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
|
||||
ctrl.metricNumMachines.Set(float64(machines))
|
||||
ctrl.metricNumConnectedMachines.Set(float64(connectedMachines))
|
||||
ctrl.metricNumInvalidSchematicMachines.Set(float64(invalidSchematicMachines))
|
||||
ctrl.metricNumApproachingTalosVersionEndOfSupportMachines.Set(float64(approachingEndOfSupportMachines))
|
||||
ctrl.metricNumTalosVersionEndOfSupportMachines.Set(float64(endOfSupportTalosVersionMachines))
|
||||
|
||||
for key, num := range platformMetrics {
|
||||
ctrl.metricMachinePlatforms.WithLabelValues(key).Set(float64(num))
|
||||
@ -342,6 +441,8 @@ func (ctrl *MachineStatusMetricsController) gatherMetrics(statuses iter.Seq[*omn
|
||||
RegisteredMachinesLimit: ctrl.maxRegisteredMachines,
|
||||
RegistrationLimitReached: ctrl.maxRegisteredMachines > 0 && uint32(machines) >= ctrl.maxRegisteredMachines,
|
||||
InvalidSchematicMachinesCount: uint32(invalidSchematicMachines),
|
||||
ApproachingTalosVersionEndOfSupportMachinesCount: uint32(approachingEndOfSupportMachines),
|
||||
TalosVersionEndOfSupportMachinesCount: uint32(endOfSupportTalosVersionMachines),
|
||||
}
|
||||
}
|
||||
|
||||
@ -365,6 +466,8 @@ func (ctrl *MachineStatusMetricsController) Collect(ch chan<- prometheus.Metric)
|
||||
ctrl.metricNumMachines.Collect(ch)
|
||||
ctrl.metricNumConnectedMachines.Collect(ch)
|
||||
ctrl.metricNumInvalidSchematicMachines.Collect(ch)
|
||||
ctrl.metricNumApproachingTalosVersionEndOfSupportMachines.Collect(ch)
|
||||
ctrl.metricNumTalosVersionEndOfSupportMachines.Collect(ch)
|
||||
ctrl.metricMachinePlatforms.Collect(ch)
|
||||
ctrl.metricMachineSecureBootStatus.Collect(ch)
|
||||
ctrl.metricMachineUKIStatus.Collect(ch)
|
||||
|
||||
@ -22,14 +22,6 @@ import (
|
||||
"github.com/siderolabs/omni/internal/backend/runtime/omni/controllers/testutils"
|
||||
)
|
||||
|
||||
func newNonImageFactoryDeprecationConfig(enabled bool) omnictrl.NonImageFactoryDeprecationConfig {
|
||||
return omnictrl.NonImageFactoryDeprecationConfig{
|
||||
Enabled: enabled,
|
||||
Title: "Non-ImageFactory Machines Detected",
|
||||
Body: "%d machine(s) were provisioned without ImageFactory.",
|
||||
}
|
||||
}
|
||||
|
||||
func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@ -82,7 +74,7 @@ func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) {
|
||||
|
||||
testutils.WithRuntime(ctx, t, testutils.TestOptions{},
|
||||
func(_ context.Context, tc testutils.TestContext) {
|
||||
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(tt.maxRegistered, omnictrl.NonImageFactoryDeprecationConfig{})))
|
||||
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(tt.maxRegistered)))
|
||||
},
|
||||
func(ctx context.Context, tc testutils.TestContext) {
|
||||
for _, id := range tt.machineIDs {
|
||||
@ -113,43 +105,42 @@ func TestMachineStatusMetricsController_RegistrationLimit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T) {
|
||||
func TestMachineStatusMetricsController_UnsupportedTalosVersion(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
require.True(t, omnictrl.UnsupportedTalosVersionNotificationEnabled, "this test assumes UnsupportedTalosVersionNotificationEnabled is true")
|
||||
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
invalidSchematicIDs []string
|
||||
validSchematicIDs []string
|
||||
enabled bool
|
||||
expectNotification bool
|
||||
expectCount int
|
||||
machineVersions map[string]string
|
||||
name string
|
||||
expectApproachingCount int
|
||||
expectEndOfSupportCount int
|
||||
expectApproaching bool
|
||||
expectEndOfSupport bool
|
||||
}{
|
||||
{
|
||||
name: "disabled, invalid machines present",
|
||||
invalidSchematicIDs: []string{"m1"},
|
||||
enabled: false,
|
||||
expectNotification: false,
|
||||
name: "all machines above threshold",
|
||||
machineVersions: map[string]string{"m1": "v1.10.0", "m2": "v1.11.0"},
|
||||
},
|
||||
{
|
||||
name: "enabled, no invalid machines",
|
||||
validSchematicIDs: []string{"m1", "m2"},
|
||||
enabled: true,
|
||||
expectNotification: false,
|
||||
name: "machines approaching end of support",
|
||||
machineVersions: map[string]string{"m1": "v1.8.0", "m2": "v1.9.0", "m3": "v1.11.0"},
|
||||
expectApproaching: true,
|
||||
expectApproachingCount: 2,
|
||||
},
|
||||
{
|
||||
name: "enabled, some invalid machines",
|
||||
invalidSchematicIDs: []string{"m1", "m2"},
|
||||
validSchematicIDs: []string{"m3"},
|
||||
enabled: true,
|
||||
expectNotification: true,
|
||||
expectCount: 2,
|
||||
name: "machines past end of support",
|
||||
machineVersions: map[string]string{"m1": "v1.7.0", "m2": "v1.6.0", "m3": "v1.11.0"},
|
||||
expectEndOfSupport: true,
|
||||
expectEndOfSupportCount: 2,
|
||||
},
|
||||
{
|
||||
name: "enabled, all invalid machines",
|
||||
invalidSchematicIDs: []string{"m1", "m2", "m3"},
|
||||
enabled: true,
|
||||
expectNotification: true,
|
||||
expectCount: 3,
|
||||
name: "mix of approaching and past end of support",
|
||||
machineVersions: map[string]string{"m1": "v1.8.0", "m2": "v1.7.0", "m3": "v1.11.0"},
|
||||
expectApproaching: true,
|
||||
expectApproachingCount: 1,
|
||||
expectEndOfSupport: true,
|
||||
expectEndOfSupportCount: 1,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
@ -160,40 +151,35 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T)
|
||||
|
||||
testutils.WithRuntime(ctx, t, testutils.TestOptions{},
|
||||
func(_ context.Context, tc testutils.TestContext) {
|
||||
require.NoError(t, tc.Runtime.RegisterController(
|
||||
omnictrl.NewMachineStatusMetricsController(0, newNonImageFactoryDeprecationConfig(tt.enabled)),
|
||||
))
|
||||
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(0)))
|
||||
},
|
||||
func(ctx context.Context, tc testutils.TestContext) {
|
||||
for _, id := range tt.invalidSchematicIDs {
|
||||
for id, version := range tt.machineVersions {
|
||||
ms := omni.NewMachineStatus(id)
|
||||
ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
Invalid: true,
|
||||
}
|
||||
ms.TypedSpec().Value.TalosVersion = version
|
||||
|
||||
require.NoError(t, tc.State.Create(ctx, ms))
|
||||
}
|
||||
|
||||
for _, id := range tt.validSchematicIDs {
|
||||
ms := omni.NewMachineStatus(id)
|
||||
ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
Id: "valid-id",
|
||||
FullId: "valid-full-id",
|
||||
}
|
||||
|
||||
require.NoError(t, tc.State.Create(ctx, ms))
|
||||
}
|
||||
|
||||
if tt.expectNotification {
|
||||
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID, func(res *omni.Notification, a *assert.Assertions) {
|
||||
a.Equal("Non-ImageFactory Machines Detected", res.TypedSpec().Value.Title)
|
||||
a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectCount))
|
||||
if tt.expectApproaching {
|
||||
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) {
|
||||
a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectApproachingCount))
|
||||
a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type)
|
||||
})
|
||||
} else {
|
||||
// Notification should not exist. Sleep briefly since there is no state change to poll on.
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID)
|
||||
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID)
|
||||
}
|
||||
|
||||
if tt.expectEndOfSupport {
|
||||
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) {
|
||||
a.Contains(res.TypedSpec().Value.Body, fmt.Sprintf("%d machine(s)", tt.expectEndOfSupportCount))
|
||||
a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type)
|
||||
})
|
||||
} else {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID)
|
||||
}
|
||||
},
|
||||
)
|
||||
@ -201,7 +187,7 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecation(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMachineStatusMetricsController_NonImageFactoryDeprecationTeardown(t *testing.T) {
|
||||
func TestMachineStatusMetricsController_UnsupportedTalosVersionTeardown(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second)
|
||||
@ -209,35 +195,27 @@ func TestMachineStatusMetricsController_NonImageFactoryDeprecationTeardown(t *te
|
||||
|
||||
testutils.WithRuntime(ctx, t, testutils.TestOptions{},
|
||||
func(_ context.Context, tc testutils.TestContext) {
|
||||
require.NoError(t, tc.Runtime.RegisterController(
|
||||
omnictrl.NewMachineStatusMetricsController(0, newNonImageFactoryDeprecationConfig(true)),
|
||||
))
|
||||
require.NoError(t, tc.Runtime.RegisterController(omnictrl.NewMachineStatusMetricsController(0)))
|
||||
},
|
||||
func(ctx context.Context, tc testutils.TestContext) {
|
||||
// Create a machine with invalid schematic.
|
||||
ms := omni.NewMachineStatus("m1")
|
||||
ms.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{Invalid: true}
|
||||
ms.TypedSpec().Value.TalosVersion = "v1.7.0"
|
||||
|
||||
require.NoError(t, tc.State.Create(ctx, ms))
|
||||
|
||||
// Wait for the notification to appear.
|
||||
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID, func(res *omni.Notification, a *assert.Assertions) {
|
||||
rtestutils.AssertResource(ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID, func(res *omni.Notification, a *assert.Assertions) {
|
||||
a.Equal(specs.NotificationSpec_WARNING, res.TypedSpec().Value.Type)
|
||||
})
|
||||
|
||||
// Fix the machine schematic (no longer invalid).
|
||||
_, err := safe.StateUpdateWithConflicts(ctx, tc.State, ms.Metadata(), func(res *omni.MachineStatus) error {
|
||||
res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
Id: "valid-id",
|
||||
FullId: "valid-full-id",
|
||||
}
|
||||
res.TypedSpec().Value.TalosVersion = "v1.11.0"
|
||||
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Notification should be torn down.
|
||||
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationNonImageFactoryMachinesID)
|
||||
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationTalosVersionEndOfSupportID)
|
||||
rtestutils.AssertNoResource[*omni.Notification](ctx, t, tc.State, omni.NotificationApproachingTalosVersionEndOfSupportID)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@ -172,11 +172,7 @@ func NewRuntime(cfg *config.Params, talosClientFactory *talos.ClientFactory, dns
|
||||
LBConfig: cfg.Services.LoadBalancer,
|
||||
},
|
||||
omnictrl.NewMachineCleanupController(),
|
||||
omnictrl.NewMachineStatusMetricsController(cfg.Account.GetMaxRegisteredMachines(), omnictrl.NonImageFactoryDeprecationConfig{
|
||||
Enabled: cfg.Notifications.NonImageFactoryDeprecation.GetEnabled(),
|
||||
Title: cfg.Notifications.NonImageFactoryDeprecation.GetTitle(),
|
||||
Body: cfg.Notifications.NonImageFactoryDeprecation.GetBody(),
|
||||
}),
|
||||
omnictrl.NewMachineStatusMetricsController(cfg.Account.GetMaxRegisteredMachines()),
|
||||
omnictrl.NewVersionsController(cfg.Registries.GetImageFactoryBaseURL(), cfg.Features.GetEnableTalosPreReleaseVersions(), cfg.Registries.GetKubernetes()),
|
||||
omnictrl.NewClusterLoadBalancerController(
|
||||
cfg.Services.LoadBalancer.GetMinPort(),
|
||||
|
||||
@ -919,39 +919,6 @@ func (s *LogsStripe) SetMinCommit(v uint32) {
|
||||
s.MinCommit = &v
|
||||
}
|
||||
|
||||
func (s *NonImageFactoryDeprecation) GetBody() string {
|
||||
if s == nil || s.Body == nil {
|
||||
return *new(string)
|
||||
}
|
||||
return *s.Body
|
||||
}
|
||||
|
||||
func (s *NonImageFactoryDeprecation) SetBody(v string) {
|
||||
s.Body = &v
|
||||
}
|
||||
|
||||
func (s *NonImageFactoryDeprecation) GetEnabled() bool {
|
||||
if s == nil || s.Enabled == nil {
|
||||
return *new(bool)
|
||||
}
|
||||
return *s.Enabled
|
||||
}
|
||||
|
||||
func (s *NonImageFactoryDeprecation) SetEnabled(v bool) {
|
||||
s.Enabled = &v
|
||||
}
|
||||
|
||||
func (s *NonImageFactoryDeprecation) GetTitle() string {
|
||||
if s == nil || s.Title == nil {
|
||||
return *new(string)
|
||||
}
|
||||
return *s.Title
|
||||
}
|
||||
|
||||
func (s *NonImageFactoryDeprecation) SetTitle(v string) {
|
||||
s.Title = &v
|
||||
}
|
||||
|
||||
func (s *OIDC) GetAllowUnverifiedEmail() bool {
|
||||
if s == nil || s.AllowUnverifiedEmail == nil {
|
||||
return *new(bool)
|
||||
|
||||
@ -30,7 +30,17 @@ import (
|
||||
"github.com/siderolabs/omni/internal/pkg/jsonschema"
|
||||
)
|
||||
|
||||
const wireguardDefaultPort = "50180"
|
||||
const (
|
||||
wireguardDefaultPort = "50180"
|
||||
|
||||
// UnsupportedTalosVersionFailOnStart controls whether Omni refuses to start when machines
|
||||
// are running Talos versions below MinTalosVersion. This will be flipped to true in a future release.
|
||||
UnsupportedTalosVersionFailOnStart = false
|
||||
|
||||
// NonImageFactoryFailOnStart controls whether Omni refuses to start when machines
|
||||
// were provisioned without ImageFactory. This will be flipped to true in a future release.
|
||||
NonImageFactoryFailOnStart = false
|
||||
)
|
||||
|
||||
//go:embed schema.json
|
||||
var schemaData string
|
||||
@ -164,6 +174,18 @@ func (p *Params) ValidateState(ctx context.Context, st state.State) error {
|
||||
}
|
||||
}
|
||||
|
||||
if UnsupportedTalosVersionFailOnStart {
|
||||
if err := validations.EnsureNoMachinesBelowMinTalosVersion(ctx, st); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if NonImageFactoryFailOnStart {
|
||||
if err := validations.EnsureNoNonImageFactoryMachines(ctx, st); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@ -564,10 +564,4 @@ func TestSchemaDefaults(t *testing.T) {
|
||||
// features
|
||||
assert.True(t, p.Features.GetEnableConfigDataCompression())
|
||||
assert.True(t, p.Features.GetEnableClusterImport())
|
||||
|
||||
// notifications.nonImageFactoryDeprecation
|
||||
assert.False(t, p.Notifications.NonImageFactoryDeprecation.GetEnabled())
|
||||
assert.Equal(t, "Non-ImageFactory Machines Detected", p.Notifications.NonImageFactoryDeprecation.GetTitle())
|
||||
assert.Equal(t, "%d machine(s) were provisioned without ImageFactory. Support for these machines will end after a future release. Please re-provision them using ImageFactory.",
|
||||
p.Notifications.NonImageFactoryDeprecation.GetBody())
|
||||
}
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
"registries",
|
||||
"debug",
|
||||
"features",
|
||||
"notifications",
|
||||
"eulaAccept"
|
||||
],
|
||||
"goJSONSchema": {
|
||||
@ -58,10 +57,6 @@
|
||||
"description": "Features contains feature flags to enable/disable various Omni features.",
|
||||
"$ref": "#/definitions/Features"
|
||||
},
|
||||
"notifications": {
|
||||
"description": "Notifications contains configuration for system notifications emitted by controllers.",
|
||||
"$ref": "#/definitions/Notifications"
|
||||
},
|
||||
"eulaAccept": {
|
||||
"description": "EulaAccept contains the identity of the person accepting the EULA via CLI flags or config.",
|
||||
"$ref": "#/definitions/EulaAccept"
|
||||
@ -1729,50 +1724,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"Notifications": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"nonImageFactoryDeprecation"
|
||||
],
|
||||
"properties": {
|
||||
"nonImageFactoryDeprecation": {
|
||||
"description": "NonImageFactoryDeprecation contains configuration for the notification shown when machines are provisioned without using ImageFactory.",
|
||||
"$ref": "#/definitions/NonImageFactoryDeprecation"
|
||||
}
|
||||
}
|
||||
},
|
||||
"NonImageFactoryDeprecation": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"description": "Enabled controls whether the non-ImageFactory deprecation notification is shown when machines with invalid schematics are detected.",
|
||||
"x-cli-flag": "non-image-factory-deprecation-enabled",
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"goJSONSchema": {
|
||||
"pointer": true
|
||||
}
|
||||
},
|
||||
"title": {
|
||||
"description": "Title is the title of the non-ImageFactory deprecation notification.",
|
||||
"x-cli-flag": "non-image-factory-deprecation-title",
|
||||
"type": "string",
|
||||
"default": "Non-ImageFactory Machines Detected",
|
||||
"goJSONSchema": {
|
||||
"pointer": true
|
||||
}
|
||||
},
|
||||
"body": {
|
||||
"description": "Body is the body of the non-ImageFactory deprecation notification. Use %d as a placeholder for the number of affected machines.",
|
||||
"x-cli-flag": "non-image-factory-deprecation-body",
|
||||
"type": "string",
|
||||
"default": "%d machine(s) were provisioned without ImageFactory. Support for these machines will end after a future release. Please re-provision them using ImageFactory.",
|
||||
"goJSONSchema": {
|
||||
"pointer": true
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"EulaAccept": {
|
||||
"type": "object",
|
||||
"dependentRequired": {
|
||||
|
||||
@ -434,25 +434,6 @@ type LogsStripe struct {
|
||||
MinCommit *uint32 `json:"minCommit,omitempty,omitzero" yaml:"minCommit,omitempty"`
|
||||
}
|
||||
|
||||
type NonImageFactoryDeprecation struct {
|
||||
// Body is the body of the non-ImageFactory deprecation notification. Use %d as a
|
||||
// placeholder for the number of affected machines.
|
||||
Body *string `json:"body,omitempty,omitzero" yaml:"body,omitempty"`
|
||||
|
||||
// Enabled controls whether the non-ImageFactory deprecation notification is shown
|
||||
// when machines with invalid schematics are detected.
|
||||
Enabled *bool `json:"enabled,omitempty,omitzero" yaml:"enabled,omitempty"`
|
||||
|
||||
// Title is the title of the non-ImageFactory deprecation notification.
|
||||
Title *string `json:"title,omitempty,omitzero" yaml:"title,omitempty"`
|
||||
}
|
||||
|
||||
type Notifications struct {
|
||||
// NonImageFactoryDeprecation contains configuration for the notification shown
|
||||
// when machines are provisioned without using ImageFactory.
|
||||
NonImageFactoryDeprecation NonImageFactoryDeprecation `json:"nonImageFactoryDeprecation" yaml:"nonImageFactoryDeprecation"`
|
||||
}
|
||||
|
||||
type OIDC struct {
|
||||
// AllowUnverifiedEmail controls whether users with unverified emails (without
|
||||
// email_verified claim) are allowed to authenticate.
|
||||
@ -501,10 +482,6 @@ type Params struct {
|
||||
// Logs contains logging-related configuration.
|
||||
Logs Logs `json:"logs" yaml:"logs"`
|
||||
|
||||
// Notifications contains configuration for system notifications emitted by
|
||||
// controllers.
|
||||
Notifications Notifications `json:"notifications" yaml:"notifications"`
|
||||
|
||||
// Registries contains container image registries configuration.
|
||||
Registries Registries `json:"registries" yaml:"registries"`
|
||||
|
||||
|
||||
41
internal/pkg/config/validations/non_image_factory.go
Normal file
41
internal/pkg/config/validations/non_image_factory.go
Normal file
@ -0,0 +1,41 @@
|
||||
// Copyright (c) 2026 Sidero Labs, Inc.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the LICENSE file.
|
||||
|
||||
package validations
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/cosi-project/runtime/pkg/safe"
|
||||
"github.com/cosi-project/runtime/pkg/state"
|
||||
"github.com/gertd/go-pluralize"
|
||||
|
||||
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
|
||||
)
|
||||
|
||||
// EnsureNoNonImageFactoryMachines checks that no machines have an invalid schematic (provisioned without ImageFactory).
|
||||
func EnsureNoNonImageFactoryMachines(ctx context.Context, st state.State) error {
|
||||
statuses, err := safe.ReaderListAll[*omni.MachineStatus](ctx, st)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var count int
|
||||
|
||||
for status := range statuses.All() {
|
||||
if status.TypedSpec().Value.SchematicReady() && status.TypedSpec().Value.GetSchematic().GetInvalid() {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("detected %s provisioned without ImageFactory; "+
|
||||
"please re-provision them using ImageFactory",
|
||||
pluralize.NewClient().Pluralize("machine", count, true))
|
||||
}
|
||||
35
internal/pkg/config/validations/unsupported_talos_version.go
Normal file
35
internal/pkg/config/validations/unsupported_talos_version.go
Normal file
@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2026 Sidero Labs, Inc.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the LICENSE file.
|
||||
|
||||
package validations
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/blang/semver/v4"
|
||||
"github.com/cosi-project/runtime/pkg/state"
|
||||
"github.com/gertd/go-pluralize"
|
||||
|
||||
"github.com/siderolabs/omni/client/pkg/constants"
|
||||
)
|
||||
|
||||
// EnsureNoMachinesBelowMinTalosVersion checks that no machines are running Talos versions below MinTalosVersion.
|
||||
func EnsureNoMachinesBelowMinTalosVersion(ctx context.Context, st state.State) error {
|
||||
minVer := semver.MustParse(constants.MinTalosVersion)
|
||||
|
||||
count, err := getMachinesBelowTalosVersion(ctx, st, minVer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("detected %s running unsupported Talos versions (below %s); "+
|
||||
"please upgrade the machines",
|
||||
pluralize.NewClient().Pluralize("machine", int(count), true), constants.MinTalosVersion)
|
||||
}
|
||||
153
internal/pkg/config/validations/validations_test.go
Normal file
153
internal/pkg/config/validations/validations_test.go
Normal file
@ -0,0 +1,153 @@
|
||||
// Copyright (c) 2026 Sidero Labs, Inc.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the LICENSE file.
|
||||
|
||||
package validations_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/cosi-project/runtime/pkg/state"
|
||||
"github.com/cosi-project/runtime/pkg/state/impl/inmem"
|
||||
"github.com/cosi-project/runtime/pkg/state/impl/namespaced"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/siderolabs/omni/client/api/omni/specs"
|
||||
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
|
||||
"github.com/siderolabs/omni/internal/pkg/config/validations"
|
||||
)
|
||||
|
||||
func TestEnsureNoMachinesBelowMinTalosVersion(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
for _, tt := range []struct {
|
||||
versions map[string]string
|
||||
name string
|
||||
expectErr bool
|
||||
}{
|
||||
{
|
||||
name: "no machines",
|
||||
versions: nil,
|
||||
},
|
||||
{
|
||||
name: "machine below MinTalosVersion",
|
||||
versions: map[string]string{"m1": "v1.7.0"},
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "machine at MinTalosVersion",
|
||||
versions: map[string]string{"m1": "v1.8.0"},
|
||||
},
|
||||
{
|
||||
name: "machine above MinTalosVersion",
|
||||
versions: map[string]string{"m1": "v1.10.0"},
|
||||
},
|
||||
{
|
||||
name: "mix of below and above",
|
||||
versions: map[string]string{"m1": "v1.7.0", "m2": "v1.10.0"},
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "machine with empty version is ignored",
|
||||
versions: map[string]string{"m1": ""},
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
st := state.WrapCore(namespaced.NewState(inmem.Build))
|
||||
|
||||
for id, version := range tt.versions {
|
||||
ms := omni.NewMachineStatus(id)
|
||||
ms.TypedSpec().Value.TalosVersion = version
|
||||
|
||||
require.NoError(t, st.Create(ctx, ms))
|
||||
}
|
||||
|
||||
err := validations.EnsureNoMachinesBelowMinTalosVersion(ctx, st)
|
||||
if tt.expectErr {
|
||||
assert.ErrorContains(t, err, "running unsupported Talos versions")
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureNoNonImageFactoryMachines(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
for _, tt := range []struct {
|
||||
machines map[string]*specs.MachineStatusSpec_Schematic
|
||||
name string
|
||||
expectErr bool
|
||||
}{
|
||||
{
|
||||
name: "no machines",
|
||||
machines: nil,
|
||||
},
|
||||
{
|
||||
name: "valid schematic",
|
||||
machines: map[string]*specs.MachineStatusSpec_Schematic{
|
||||
"m1": {Id: "abc", FullId: "abc123"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "no schematic (not ready)",
|
||||
machines: map[string]*specs.MachineStatusSpec_Schematic{
|
||||
"m1": nil,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid schematic",
|
||||
machines: map[string]*specs.MachineStatusSpec_Schematic{
|
||||
"m1": {Invalid: true},
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "mix of valid and invalid",
|
||||
machines: map[string]*specs.MachineStatusSpec_Schematic{
|
||||
"m1": {Id: "abc", FullId: "abc123"},
|
||||
"m2": {Invalid: true},
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "agent mode is not counted",
|
||||
machines: map[string]*specs.MachineStatusSpec_Schematic{
|
||||
"m1": {InAgentMode: true, Invalid: true},
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
st := state.WrapCore(namespaced.NewState(inmem.Build))
|
||||
|
||||
for id, schematic := range tt.machines {
|
||||
ms := omni.NewMachineStatus(id)
|
||||
ms.TypedSpec().Value.Schematic = schematic
|
||||
|
||||
require.NoError(t, st.Create(ctx, ms))
|
||||
}
|
||||
|
||||
err := validations.EnsureNoNonImageFactoryMachines(ctx, st)
|
||||
if tt.expectErr {
|
||||
assert.ErrorContains(t, err, "provisioned without ImageFactory")
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user