mirror of
https://github.com/siderolabs/omni.git
synced 2025-08-06 01:27:02 +02:00
Some checks failed
default / default (push) Has been cancelled
default / e2e-backups (push) Has been cancelled
default / e2e-forced-removal (push) Has been cancelled
default / e2e-scaling (push) Has been cancelled
default / e2e-short (push) Has been cancelled
default / e2e-short-secureboot (push) Has been cancelled
default / e2e-templates (push) Has been cancelled
default / e2e-upgrades (push) Has been cancelled
default / e2e-workload-proxy (push) Has been cancelled
All test modules were moved under `integration` tag and are now in `internal/integration` folder: no more `cmd/integration-test` executable. New Kres version is able to build the same executable from the tests directory instead. All Omni related flags were renamed, for example `--endpoint` -> `--omni.endpoint`. 2 more functional changes: - Enabled `--test.failfast` for all test runs. - Removed finalizers, which were running if the test has failed. Both of these changes should make it easier to understand the test failure: Talos node logs won't be cluttered with the finalizer tearing down the cluster. Fixes: https://github.com/siderolabs/omni/issues/1171 Signed-off-by: Artem Chernyshev <artem.chernyshev@talos-systems.com>
194 lines
6.7 KiB
Go
194 lines
6.7 KiB
Go
// Copyright (c) 2025 Sidero Labs, Inc.
|
|
//
|
|
// Use of this software is governed by the Business Source License
|
|
// included in the LICENSE file.
|
|
|
|
//go:build integration
|
|
|
|
package integration_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
_ "embed"
|
|
"os"
|
|
"testing"
|
|
"text/template"
|
|
"time"
|
|
|
|
"github.com/cosi-project/runtime/pkg/resource"
|
|
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
|
|
"github.com/cosi-project/runtime/pkg/state"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/siderolabs/omni/client/api/omni/specs"
|
|
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
|
|
"github.com/siderolabs/omni/client/pkg/template/operations"
|
|
)
|
|
|
|
//go:embed testdata/cluster-1.tmpl.yaml
|
|
var cluster1Tmpl []byte
|
|
|
|
//go:embed testdata/cluster-2.tmpl.yaml
|
|
var cluster2Tmpl []byte
|
|
|
|
type tmplOptions struct {
|
|
KubernetesVersion string
|
|
TalosVersion string
|
|
|
|
CP []string
|
|
W []string
|
|
}
|
|
|
|
func renderTemplate(t *testing.T, tmpl []byte, opts tmplOptions) []byte {
|
|
var b bytes.Buffer
|
|
|
|
require.NoError(t, template.Must(template.New("cluster").Parse(string(tmpl))).Execute(&b, opts))
|
|
|
|
return b.Bytes()
|
|
}
|
|
|
|
// AssertClusterTemplateFlow verifies cluster template operations.
|
|
func AssertClusterTemplateFlow(testCtx context.Context, st state.State, options MachineOptions) TestFunc {
|
|
return func(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(testCtx, 20*time.Minute)
|
|
defer cancel()
|
|
|
|
const (
|
|
clusterName = "tmpl-cluster"
|
|
additionalWorkersName = "additional-workers"
|
|
)
|
|
|
|
require := require.New(t)
|
|
|
|
var (
|
|
machineIDs []resource.ID
|
|
opts tmplOptions
|
|
tmpl1 []byte
|
|
)
|
|
|
|
pickUnallocatedMachines(ctx, t, st, 5, func(mIDs []resource.ID) {
|
|
machineIDs = mIDs
|
|
|
|
opts = tmplOptions{
|
|
KubernetesVersion: "v" + options.KubernetesVersion,
|
|
TalosVersion: "v" + options.TalosVersion,
|
|
|
|
CP: machineIDs[:3],
|
|
W: machineIDs[3:],
|
|
}
|
|
|
|
tmpl1 = renderTemplate(t, cluster1Tmpl, opts)
|
|
|
|
require.NoError(operations.ValidateTemplate(bytes.NewReader(tmpl1)))
|
|
|
|
t.Log("creating template cluster")
|
|
|
|
require.NoError(operations.SyncTemplate(ctx, bytes.NewReader(tmpl1), os.Stderr, st, operations.SyncOptions{
|
|
Verbose: true,
|
|
}))
|
|
|
|
// assert that machines got allocated (label available is removed)
|
|
rtestutils.AssertResources(ctx, t, st, machineIDs, func(machineStatus *omni.MachineStatus, assert *assert.Assertions) {
|
|
assert.True(machineStatus.Metadata().Labels().Matches(
|
|
resource.LabelTerm{
|
|
Key: omni.MachineStatusLabelAvailable,
|
|
Op: resource.LabelOpExists,
|
|
Invert: true,
|
|
},
|
|
), resourceDetails(machineStatus))
|
|
})
|
|
})
|
|
|
|
t.Log("wait for cluster to be ready")
|
|
|
|
// wait using the status command
|
|
require.NoError(operations.StatusTemplate(ctx, bytes.NewReader(tmpl1), os.Stderr, st, operations.StatusOptions{
|
|
Wait: true,
|
|
}))
|
|
|
|
// re-check with short timeout to make sure the cluster is ready
|
|
checkCtx, checkCancel := context.WithTimeout(ctx, 30*time.Second)
|
|
defer checkCancel()
|
|
|
|
rtestutils.AssertResources(checkCtx, t, st, []string{clusterName}, func(status *omni.ClusterStatus, assert *assert.Assertions) {
|
|
spec := status.TypedSpec().Value
|
|
|
|
assert.Truef(spec.Available, "not available: %s", resourceDetails(status))
|
|
assert.Equalf(specs.ClusterStatusSpec_RUNNING, spec.Phase, "cluster is not in phase running: %s", resourceDetails(status))
|
|
assert.Equalf(spec.GetMachines().Total, spec.GetMachines().Healthy, "not all machines are healthy: %s", resourceDetails(status))
|
|
assert.Truef(spec.Ready, "cluster is not ready: %s", resourceDetails(status))
|
|
assert.Truef(spec.ControlplaneReady, "cluster controlplane is not ready: %s", resourceDetails(status))
|
|
assert.Truef(spec.KubernetesAPIReady, "cluster kubernetes API is not ready: %s", resourceDetails(status))
|
|
assert.EqualValuesf(len(opts.CP)+len(opts.W), spec.GetMachines().Total, "total machines is not the same as in the machine sets: %s", resourceDetails(status))
|
|
})
|
|
|
|
rtestutils.AssertResources(checkCtx, t, st, []string{
|
|
omni.ControlPlanesResourceID(clusterName),
|
|
omni.WorkersResourceID(clusterName),
|
|
omni.AdditionalWorkersResourceID(clusterName, additionalWorkersName),
|
|
}, func(*omni.MachineSet, *assert.Assertions) {})
|
|
|
|
t.Log("updating template cluster")
|
|
|
|
opts.CP = opts.CP[:1]
|
|
|
|
tmpl2 := renderTemplate(t, cluster2Tmpl, opts)
|
|
|
|
require.NoError(operations.SyncTemplate(ctx, bytes.NewReader(tmpl2), os.Stderr, st, operations.SyncOptions{
|
|
Verbose: true,
|
|
}))
|
|
|
|
t.Log("waiting for cluster operations to apply")
|
|
|
|
time.Sleep(10 * time.Second)
|
|
|
|
t.Log("wait for cluster to be ready")
|
|
|
|
// wait using the status command
|
|
require.NoError(operations.StatusTemplate(ctx, bytes.NewReader(tmpl2), os.Stderr, st, operations.StatusOptions{
|
|
Wait: true,
|
|
}))
|
|
|
|
// re-check with short timeout to make sure the cluster is ready
|
|
checkCtx, checkCancel = context.WithTimeout(ctx, 10*time.Second)
|
|
defer checkCancel()
|
|
|
|
rtestutils.AssertResources(checkCtx, t, st, []string{clusterName}, func(status *omni.ClusterStatus, assert *assert.Assertions) {
|
|
spec := status.TypedSpec().Value
|
|
|
|
assert.Truef(spec.Available, "not available: %s", resourceDetails(status))
|
|
assert.Equalf(specs.ClusterStatusSpec_RUNNING, spec.Phase, "cluster is not in phase running: %s", resourceDetails(status))
|
|
assert.Equalf(spec.GetMachines().Total, spec.GetMachines().Healthy, "not all machines are healthy: %s", resourceDetails(status))
|
|
assert.Truef(spec.Ready, "cluster is not ready: %s", resourceDetails(status))
|
|
assert.Truef(spec.ControlplaneReady, "cluster controlplane is not ready: %s", resourceDetails(status))
|
|
assert.Truef(spec.KubernetesAPIReady, "cluster kubernetes API is not ready: %s", resourceDetails(status))
|
|
assert.EqualValuesf(len(opts.CP)+len(opts.W), spec.GetMachines().Total, "total machines is not the same as in the machine sets: %s", resourceDetails(status))
|
|
})
|
|
|
|
require.NoError(operations.ValidateTemplate(bytes.NewReader(tmpl1)))
|
|
|
|
t.Log("deleting template cluster")
|
|
|
|
require.NoError(operations.DeleteTemplate(ctx, bytes.NewReader(tmpl1), os.Stderr, st, operations.SyncOptions{
|
|
Verbose: true,
|
|
}))
|
|
|
|
rtestutils.AssertNoResource[*omni.Cluster](ctx, t, st, clusterName)
|
|
|
|
// make sure machines are returned to the pool or allocated into another cluster
|
|
rtestutils.AssertResources(ctx, t, st, machineIDs, func(machineStatus *omni.MachineStatus, assert *assert.Assertions) {
|
|
assert.True(machineStatus.Metadata().Labels().Matches(resource.LabelTerm{
|
|
Key: omni.MachineStatusLabelAvailable,
|
|
Op: resource.LabelOpExists,
|
|
}) || machineStatus.Metadata().Labels().Matches(resource.LabelTerm{
|
|
Key: omni.LabelCluster,
|
|
Op: resource.LabelOpEqual,
|
|
Value: []string{clusterName},
|
|
Invert: true,
|
|
}), resourceDetails(machineStatus))
|
|
})
|
|
}
|
|
}
|