mirror of
https://github.com/siderolabs/omni.git
synced 2025-08-06 17:46:59 +02:00
Some checks failed
default / default (push) Has been cancelled
default / e2e-backups (push) Has been cancelled
default / e2e-forced-removal (push) Has been cancelled
default / e2e-omni-upgrade (push) Has been cancelled
default / e2e-scaling (push) Has been cancelled
default / e2e-short (push) Has been cancelled
default / e2e-short-secureboot (push) Has been cancelled
default / e2e-templates (push) Has been cancelled
default / e2e-upgrades (push) Has been cancelled
default / e2e-workload-proxy (push) Has been cancelled
We read the block disks on the machine status if there is a system disk to detect if Talos is installed on the machine. Based on this, we allow or disallow maintenance updates. This value might be stale when the bare-metal provider is used, as it powers off the machines when they are idle, and this causes their hw information to not be updated. So, the disk might be wiped, but machine status poller might not catch the change in the block devices before it is powered off. Instead, take the `installed` flag on the infra.MachineStatus resource as the source of truth for the machines managed by the bare metal infra provider. Update the `installed` label based on that. Change the logic in the frontend for installation detection to use this label instead of checking block devices directly. Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
182 lines
5.3 KiB
Go
182 lines
5.3 KiB
Go
// Copyright (c) 2025 Sidero Labs, Inc.
|
|
//
|
|
// Use of this software is governed by the Business Source License
|
|
// included in the LICENSE file.
|
|
|
|
//go:build integration
|
|
|
|
package integration_test
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/cosi-project/runtime/pkg/resource"
|
|
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
|
|
"github.com/cosi-project/runtime/pkg/safe"
|
|
"github.com/cosi-project/runtime/pkg/state"
|
|
talosclient "github.com/siderolabs/talos/pkg/machinery/client"
|
|
clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config"
|
|
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/siderolabs/omni/client/pkg/client"
|
|
"github.com/siderolabs/omni/client/pkg/omni/resources"
|
|
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
|
|
)
|
|
|
|
const annotationSnapshot = "snapshot"
|
|
|
|
type clusterSnapshot struct {
|
|
Versions map[string]string
|
|
BootTimes map[string]time.Time
|
|
}
|
|
|
|
func (vs clusterSnapshot) saveVersion(res resource.Resource) {
|
|
vs.Versions[res.Metadata().Type()+"/"+res.Metadata().ID()] = res.Metadata().Version().Next().String()
|
|
}
|
|
|
|
func (vs clusterSnapshot) getVersion(res resource.Resource) (string, bool) {
|
|
val, ok := vs.Versions[res.Metadata().Type()+"/"+res.Metadata().ID()]
|
|
|
|
return val, ok
|
|
}
|
|
|
|
// SaveClusterSnapshot saves resources versions as the annotations for the given cluster.
|
|
func SaveClusterSnapshot(testCtx context.Context, client *client.Client, clusterName string) TestFunc {
|
|
return func(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(testCtx, time.Minute)
|
|
defer cancel()
|
|
|
|
st := client.Omni().State()
|
|
|
|
snapshot := clusterSnapshot{
|
|
Versions: map[string]string{},
|
|
BootTimes: map[string]time.Time{},
|
|
}
|
|
|
|
ids := rtestutils.ResourceIDs[*omni.RedactedClusterMachineConfig](ctx, t, st,
|
|
state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterName)),
|
|
)
|
|
|
|
rtestutils.AssertResources(ctx, t, st, ids, func(res *omni.RedactedClusterMachineConfig, _ *assert.Assertions) {
|
|
snapshot.saveVersion(res)
|
|
})
|
|
|
|
require := require.New(t)
|
|
assert := assert.New(t)
|
|
|
|
data, err := client.Management().Talosconfig(ctx)
|
|
require.NoError(err)
|
|
assert.NotEmpty(data)
|
|
|
|
config, err := clientconfig.FromBytes(data)
|
|
require.NoError(err)
|
|
|
|
c, err := talosclient.New(ctx, talosclient.WithConfig(config), talosclient.WithCluster(clusterName))
|
|
require.NoError(err)
|
|
|
|
t.Cleanup(func() {
|
|
require.NoError(c.Close())
|
|
})
|
|
|
|
machineIDs := rtestutils.ResourceIDs[*omni.ClusterMachine](ctx, t, client.Omni().State(),
|
|
state.WithLabelQuery(
|
|
resource.LabelEqual(omni.LabelCluster, clusterName),
|
|
resource.LabelExists(omni.LabelControlPlaneRole),
|
|
),
|
|
)
|
|
|
|
for _, machineID := range machineIDs {
|
|
var ms *runtime.MachineStatus
|
|
|
|
ms, err = safe.ReaderGetByID[*runtime.MachineStatus](talosclient.WithNode(ctx, machineID), c.COSI, runtime.MachineStatusID)
|
|
|
|
require.NoError(err)
|
|
|
|
snapshot.BootTimes[machineID] = ms.Metadata().Created()
|
|
}
|
|
|
|
snapshotData, err := json.Marshal(snapshot)
|
|
|
|
require.NoError(err)
|
|
|
|
_, err = safe.StateUpdateWithConflicts(ctx,
|
|
client.Omni().State(),
|
|
omni.NewCluster(resources.DefaultNamespace, clusterName).Metadata(),
|
|
func(res *omni.Cluster) error {
|
|
res.Metadata().Annotations().Set(annotationSnapshot, string(snapshotData))
|
|
|
|
return nil
|
|
},
|
|
)
|
|
|
|
require.NoError(err)
|
|
}
|
|
}
|
|
|
|
// AssertClusterSnapshot reads the snapshot from the cluster resource and asserts that versions did not change
|
|
// and the last events still can be found in the node events.
|
|
func AssertClusterSnapshot(testCtx context.Context, client *client.Client, clusterName string) TestFunc {
|
|
return func(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(testCtx, time.Minute)
|
|
defer cancel()
|
|
|
|
st := client.Omni().State()
|
|
|
|
require := require.New(t)
|
|
|
|
var snapshot clusterSnapshot
|
|
|
|
cluster, err := safe.ReaderGetByID[*omni.Cluster](ctx, st, clusterName)
|
|
require.NoError(err)
|
|
|
|
snapshotData, ok := cluster.Metadata().Annotations().Get(annotationSnapshot)
|
|
|
|
require.True(ok, "cluster does not have snapshot annotation")
|
|
|
|
require.NoError(json.Unmarshal([]byte(snapshotData), &snapshot))
|
|
|
|
ids := rtestutils.ResourceIDs[*omni.RedactedClusterMachineConfig](ctx, t, st,
|
|
state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterName)),
|
|
)
|
|
|
|
rtestutils.AssertResources(ctx, t, st, ids, func(res *omni.RedactedClusterMachineConfig, assert *assert.Assertions) {
|
|
version, ok := snapshot.getVersion(res)
|
|
|
|
assert.True(ok)
|
|
require.Equal(version, res.Metadata().Version().String())
|
|
})
|
|
|
|
data, err := client.Management().Talosconfig(ctx)
|
|
require.NoError(err)
|
|
assert.NotEmpty(t, data)
|
|
|
|
config, err := clientconfig.FromBytes(data)
|
|
require.NoError(err)
|
|
|
|
c, err := talosclient.New(ctx, talosclient.WithConfig(config), talosclient.WithCluster(clusterName))
|
|
require.NoError(err)
|
|
|
|
t.Cleanup(func() {
|
|
require.NoError(c.Close())
|
|
})
|
|
|
|
for machineID, bootTime := range snapshot.BootTimes {
|
|
var ms *runtime.MachineStatus
|
|
|
|
ms, err = safe.ReaderGetByID[*runtime.MachineStatus](talosclient.WithNode(ctx, machineID), c.COSI, runtime.MachineStatusID)
|
|
|
|
require.NoError(err)
|
|
|
|
require.True(ms.TypedSpec().Status.Ready)
|
|
require.Equal(runtime.MachineStageRunning, ms.TypedSpec().Stage)
|
|
|
|
require.Equal(bootTime, ms.Metadata().Created(), "the machine was rebooted")
|
|
}
|
|
}
|
|
}
|