omni/internal/integration/suites_test.go
Artem Chernyshev c9c4c8e10d
Some checks failed
default / default (push) Has been cancelled
default / e2e-backups (push) Has been cancelled
default / e2e-forced-removal (push) Has been cancelled
default / e2e-scaling (push) Has been cancelled
default / e2e-short (push) Has been cancelled
default / e2e-short-secureboot (push) Has been cancelled
default / e2e-templates (push) Has been cancelled
default / e2e-upgrades (push) Has been cancelled
default / e2e-workload-proxy (push) Has been cancelled
test: use go test to build and run Omni integration tests
All test modules were moved under `integration` tag and are now in
`internal/integration` folder: no more `cmd/integration-test`
executable.

New Kres version is able to build the same executable from the tests
directory instead.

All Omni related flags were renamed, for example `--endpoint` ->
`--omni.endpoint`.

2 more functional changes:

- Enabled `--test.failfast` for all test runs.
- Removed finalizers, which were running if the test has failed.

Both of these changes should make it easier to understand the test
failure: Talos node logs won't be cluttered with the finalizer tearing
down the cluster.

Fixes: https://github.com/siderolabs/omni/issues/1171

Signed-off-by: Artem Chernyshev <artem.chernyshev@talos-systems.com>
2025-06-03 15:07:00 +03:00

1374 lines
36 KiB
Go

// Copyright (c) 2025 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//go:build integration
package integration_test
import (
"context"
"net/http"
"testing"
"time"
"github.com/siderolabs/omni/client/api/omni/specs"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
"github.com/siderolabs/omni/internal/pkg/clientconfig"
"google.golang.org/protobuf/types/known/durationpb"
)
type assertClusterReadyOptions struct {
talosVersion string
kubernetesVersion string
}
type assertClusterReadyOption func(*assertClusterReadyOptions)
func withTalosVersion(version string) assertClusterReadyOption {
return func(acro *assertClusterReadyOptions) {
acro.talosVersion = version
}
}
func withKubernetesVersion(version string) assertClusterReadyOption {
return func(acro *assertClusterReadyOptions) {
acro.kubernetesVersion = version
}
}
func assertClusterAndAPIReady(t *testing.T, clusterName string, options *TestOptions, opts ...assertClusterReadyOption) {
optionsStruct := assertClusterReadyOptions{
talosVersion: options.MachineOptions.TalosVersion,
kubernetesVersion: options.MachineOptions.KubernetesVersion,
}
for _, o := range opts {
o(&optionsStruct)
}
runTests(t, AssertBlockClusterAndTalosAPIAndKubernetesShouldBeReady(
t.Context(),
options.omniClient,
clusterName,
optionsStruct.talosVersion,
optionsStruct.kubernetesVersion,
options.talosAPIKeyPrepare,
))
}
func testCleanState(options *TestOptions) TestFunc {
return func(t *testing.T) {
ctx := t.Context()
t.Log(`
Bring the state of Omni to a clean state by removing all clusters, config patches, etc. which might have been left from previous runs.
Wait for all expected machines to join and be in maintenance mode.`)
t.Run(
"DestroyAllClusterRelatedResources",
DestroyAllClusterRelatedResources(ctx, options.omniClient.Omni().State()),
)
// machine discovery, all machines should be in maintenance mode
t.Run(
"LinkCountShouldMatchExpectedMachines",
AssertNumberOfLinks(ctx, options.omniClient.Omni().State(), expectedMachines),
)
t.Run(
"LinksShouldBeConnected",
AssertLinksConnected(ctx, options.omniClient.Omni().State()),
)
t.Run(
"LinksShouldMatchMachines",
AssertMachinesMatchLinks(ctx, options.omniClient.Omni().State()),
)
t.Run(
"MachinesShouldHaveLogs",
AssertMachinesHaveLogs(ctx, options.omniClient.Omni().State(), options.omniClient.Management()),
)
t.Run(
"MachinesShouldBeReachableInMaintenanceMode",
AssertTalosMaintenanceAPIAccessViaOmni(ctx, options.omniClient, options.talosAPIKeyPrepare),
)
t.Run(
"MachinesShouldBeInMaintenanceMode",
AssertMachineStatus(ctx, options.omniClient.Omni().State(), true, "", map[string]string{
omni.MachineStatusLabelConnected: "",
omni.MachineStatusLabelReportingEvents: "",
omni.MachineStatusLabelAvailable: "",
// QEMU-specific labels which should always match, others are specific to the settings (number of cores, etc.)
omni.MachineStatusLabelCPU: "qemu",
omni.MachineStatusLabelArch: "amd64",
omni.MachineStatusLabelPlatform: "metal",
}, nil),
)
}
}
func testImageGeneration(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Parallel()
t.Log(`
Generate various Talos images with Omni and try to download them.`)
t.Run(
"TalosImagesShouldBeDownloadableUsingCLI",
AssertDownloadUsingCLI(t.Context(), options.omniClient, options.OmnictlPath, options.HTTPEndpoint),
)
t.Run(
"TalosImagesShouldBeDownloadable",
AssertSomeImagesAreDownloadable(t.Context(), options.omniClient, func(ctx context.Context, req *http.Request) error {
return clientconfig.SignHTTPRequest(ctx, options.omniClient, req)
}, options.HTTPEndpoint),
)
}
}
func testCLICommands(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Parallel()
t.Log(`
Verify various omnictl commands.`)
t.Run(
"OmnictlUserCLIShouldWork",
AssertUserCLI(t.Context(), options.omniClient, options.OmnictlPath, options.HTTPEndpoint),
)
}
}
func testKubernetesNodeAudit(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Parallel()
clusterName := "integration-k8s-node-audit"
options.claimMachines(t, 2)
t.Log(`
Test the auditing of the Kubernetes nodes, i.e. when a node is gone from the Omni perspective but still exists on the Kubernetes cluster.`)
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
runTests(
t,
AssertBlockClusterAndTalosAPIAndKubernetesShouldBeReady(
t.Context(),
options.omniClient,
clusterName,
options.MachineOptions.TalosVersion,
options.MachineOptions.KubernetesVersion,
options.talosAPIKeyPrepare,
),
)
t.Run(
"KubernetesNodeAuditShouldBePerformed",
AssertKubernetesNodeAudit(
t.Context(),
clusterName,
options,
),
)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testForcedMachineRemoval(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests different scenarios for forced Machine removal (vs. graceful removing from a cluster):
- force remove a Machine which is not allocated (not part of any cluster)
- force remove a worker Machine which is part of the cluster
- force remove a control plane Machine which is part of the cluster, and replace with a new Machine.
These tests simulate a hardware failure of a Machine which requires a forced removal from Omni.
In the tests, we wipe and reboot the VMs to bring them back as available for the next test.`)
t.Parallel()
options.claimMachines(t, 4)
clusterName := "integration-forced-removal"
assertClusterReady := func() {
runTests(t, AssertBlockClusterShouldBeReady(
t.Context(),
options.omniClient,
clusterName,
options.MachineOptions.TalosVersion,
options.talosAPIKeyPrepare,
))
}
t.Run(
"UnallocatedMachinesShouldBeDestroyable",
AssertUnallocatedMachineDestroyFlow(t.Context(), options.omniClient.Omni().State(), options.RestartAMachineFunc),
)
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 3,
Workers: 1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterReady()
t.Run(
"WorkerNodesShouldBeForceRemovable",
AssertForceRemoveWorkerNode(t.Context(), options.omniClient.Omni().State(), clusterName, options.FreezeAMachineFunc, options.WipeAMachineFunc),
)
assertClusterReady()
t.Run(
"ControlPlaneNodeShouldBeForceReplaceable",
AssertControlPlaneForceReplaceMachine(
t.Context(),
options.omniClient.Omni().State(),
clusterName,
options.Options,
),
)
assertClusterReady()
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testImmediateClusterDestruction(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Regression test: create a cluster and destroy it without waiting for the cluster to reach any state.`)
t.Parallel()
options.claimMachines(t, 3)
clusterName := "integration-immediate"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 2,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testDefaultCluster(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Create a regular 3 + 2 cluster with HA controlplane, assert that the cluster is ready and accessible.
Don't do any changes to the cluster.`)
t.Parallel()
clusterOptions := ClusterOptions{
ControlPlanes: 3,
Workers: 2,
MachineOptions: options.MachineOptions,
}
options.claimMachines(t, clusterOptions.ControlPlanes+clusterOptions.Workers)
runTests(t, AssertClusterCreateAndReady(
t.Context(),
options.omniClient,
options.talosAPIKeyPrepare,
"default",
clusterOptions,
options.OutputDir,
))
}
}
func testEncryptedCluster(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Create a 1 + 1 cluster and enable disk encryption via Omni as a KMS.
Don't do any changes to the cluster.`)
t.Parallel()
clusterOptions := ClusterOptions{
ControlPlanes: 1,
Workers: 1,
MachineOptions: options.MachineOptions,
Features: &specs.ClusterSpec_Features{
DiskEncryption: true,
},
}
options.claimMachines(t, clusterOptions.ControlPlanes+clusterOptions.Workers)
runTests(t, AssertClusterCreateAndReady(
t.Context(),
options.omniClient,
options.talosAPIKeyPrepare,
"encrypted",
clusterOptions,
options.OutputDir,
))
}
}
func testSinglenodeCluster(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Create a single node cluster.
Don't do any changes to the cluster.`)
t.Parallel()
clusterOptions := ClusterOptions{
ControlPlanes: 1,
Workers: 0,
MachineOptions: options.MachineOptions,
}
options.claimMachines(t, clusterOptions.ControlPlanes+clusterOptions.Workers)
runTests(t, AssertClusterCreateAndReady(
t.Context(),
options.omniClient,
options.talosAPIKeyPrepare,
"singlenode",
clusterOptions,
options.OutputDir,
))
}
}
func testScaleUpAndDown(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests scaling up and down a cluster:
- create a 1+0 cluster
- scale up to 1+1
- scale up to 3+1
- scale down to 3+0
- scale down to 1+0
In between the scaling operations, assert that the cluster is ready and accessible.`)
t.Parallel()
options.claimMachines(t, 4)
clusterName := "integration-scaling"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"OneWorkerShouldBeAdded",
ScaleClusterUp(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 0,
Workers: 1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"TwoControlPlanesShouldBeAdded",
ScaleClusterUp(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 2,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"OneWorkerShouldBeRemoved",
ScaleClusterDown(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 0,
Workers: -1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"TwoControlPlanesShouldBeRemoved",
ScaleClusterDown(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: -2,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testScaleUpAndDownMachineClassBasedMachineSets(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests scaling up and down a cluster using machine classes:
- create a 1+0 cluster
- scale up to 1+1
- scale up to 3+1
- scale down to 3+0
- scale down to 1+0
In between the scaling operations, assert that the cluster is ready and accessible.`)
t.Parallel()
options.claimMachines(t, 4)
clusterName := "integration-scaling-machine-class-based-machine-sets"
t.Run(
"ClusterShouldBeCreated",
CreateClusterWithMachineClass(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"OneWorkerShouldBeAdded",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 0,
Workers: 1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"TwoControlPlanesShouldBeAdded",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 2,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"OneWorkerShouldBeRemoved",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 0,
Workers: -1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"TwoControlPlanesShouldBeRemoved",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: -2,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testScaleUpAndDownAutoProvisionMachineSets(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests scaling up and down a cluster using infrastructure provisioner:
- create a 1+0 cluster
- scale up to 1+1
- scale up to 3+1
- scale down to 3+0
- scale down to 1+0
In between the scaling operations, assert that the cluster is ready and accessible.`)
t.Parallel()
clusterName := "integration-scaling-auto-provision"
t.Run(
"ClusterShouldBeCreated",
CreateClusterWithMachineClass(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 0,
InfraProvider: options.defaultInfraProvider(),
MachineOptions: options.MachineOptions,
ProviderData: options.defaultProviderData(),
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"OneWorkerShouldBeAdded",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 0,
Workers: 1,
InfraProvider: options.defaultInfraProvider(),
MachineOptions: options.MachineOptions,
ProviderData: options.defaultProviderData(),
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"TwoControlPlanesShouldBeAdded",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 2,
Workers: 0,
MachineOptions: options.MachineOptions,
ProviderData: options.defaultInfraProvider(),
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"OneWorkerShouldBeRemoved",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 0,
Workers: -1,
InfraProvider: options.defaultInfraProvider(),
MachineOptions: options.MachineOptions,
ProviderData: options.defaultProviderData(),
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"TwoControlPlanesShouldBeRemoved",
ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: -2,
Workers: 0,
InfraProvider: options.defaultInfraProvider(),
MachineOptions: options.MachineOptions,
ProviderData: options.defaultProviderData(),
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, true, false),
)
}
}
func testRollingUpdateParallelism(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests rolling update & scale down strategies for concurrency control for worker machine sets.
- create a 1+3 cluster
- update the worker configs with rolling strategy using maxParallelism of 2
- scale down the workers to 0 with rolling strategy using maxParallelism of 2
- assert that the maxParallelism of 2 was respected and used in both operations,`)
t.Parallel()
clusterName := "integration-rolling-update-parallelism"
options.claimMachines(t, 4)
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 3,
MachineOptions: options.MachineOptions,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"WorkersUpdateShouldBeRolledOutWithMaxParallelism",
AssertWorkerNodesRollingConfigUpdate(t.Context(), options.omniClient, clusterName, 2),
)
t.Run(
"WorkersShouldScaleDownWithMaxParallelism",
AssertWorkerNodesRollingScaleDown(t.Context(), options.omniClient, clusterName, 2),
)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testReplaceControlPlanes(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests replacing control plane nodes:
- create a 1+0 cluster
- scale up to 2+0, and immediately remove the first control plane node
In between the scaling operations, assert that the cluster is ready and accessible.`)
t.Parallel()
options.claimMachines(t, 2)
clusterName := "integration-replace-cp"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ControlPlanesShouldBeReplaced",
ReplaceControlPlanes(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
MachineOptions: options.MachineOptions,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testConfigPatching(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests applying various config patching, including "broken" config patches which should not apply.`)
t.Parallel()
options.claimMachines(t, 4)
clusterName := "integration-config-patching"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 3,
Workers: 1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"LargeImmediateConfigPatchShouldBeAppliedAndRemoved",
AssertLargeImmediateConfigApplied(t.Context(), options.omniClient, clusterName, options.talosAPIKeyPrepare),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"MachineSetConfigPatchShouldBeAppliedAndRemoved",
AssertConfigPatchMachineSet(t.Context(), options.omniClient, clusterName),
)
t.Run(
"SingleClusterMachineConfigPatchShouldBeAppliedAndRemoved",
AssertConfigPatchSingleClusterMachine(t.Context(), options.omniClient, clusterName),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ConfigPatchWithRebootShouldBeApplied",
AssertConfigPatchWithReboot(t.Context(), options.omniClient, clusterName, options.talosAPIKeyPrepare),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"InvalidConfigPatchShouldNotBeApplied",
AssertConfigPatchWithInvalidConfig(t.Context(), options.omniClient, clusterName, options.talosAPIKeyPrepare),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testTalosUpgrades(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests upgrading Talos version, including reverting a failed upgrade.`)
t.Parallel()
options.claimMachines(t, 4)
clusterName := "integration-talos-upgrade"
machineOptions := MachineOptions{
TalosVersion: options.AnotherTalosVersion,
KubernetesVersion: options.AnotherKubernetesVersion, // use older Kubernetes compatible with AnotherTalosVersion
}
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 3,
Workers: 1,
MachineOptions: machineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options, withTalosVersion(machineOptions.TalosVersion), withKubernetesVersion(machineOptions.KubernetesVersion))
if !options.SkipExtensionsCheckOnCreate {
t.Run(
"HelloWorldServiceExtensionShouldBePresent",
AssertExtensionIsPresent(t.Context(), options.omniClient, clusterName, HelloWorldServiceExtensionName),
)
}
t.Run(
"TalosSchematicUpdateShouldSucceed",
AssertTalosSchematicUpdateFlow(t.Context(), options.omniClient, clusterName),
)
t.Run(
"QemuGuestAgentExtensionShouldBePresent",
AssertExtensionIsPresent(t.Context(), options.omniClient, clusterName, QemuGuestAgentExtensionName),
)
t.Run(
"ClusterBootstrapManifestSyncShouldBeSuccessful",
KubernetesBootstrapManifestSync(t.Context(), options.omniClient.Management(), clusterName),
)
t.Run(
"TalosUpgradeShouldSucceed",
AssertTalosUpgradeFlow(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.TalosVersion),
)
t.Run(
"ClusterBootstrapManifestSyncShouldBeSuccessful",
KubernetesBootstrapManifestSync(t.Context(), options.omniClient.Management(), clusterName),
)
if !options.SkipExtensionsCheckOnCreate {
t.Run(
"HelloWorldServiceExtensionShouldBePresent",
AssertExtensionIsPresent(t.Context(), options.omniClient, clusterName, HelloWorldServiceExtensionName),
)
}
assertClusterAndAPIReady(t, clusterName, options, withTalosVersion(options.MachineOptions.TalosVersion), withKubernetesVersion(machineOptions.KubernetesVersion))
t.Run(
"FailedTalosUpgradeShouldBeRevertible",
AssertTalosUpgradeIsRevertible(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.TalosVersion),
)
t.Run(
"RunningTalosUpgradeShouldBeCancelable",
AssertTalosUpgradeIsCancelable(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.TalosVersion, options.AnotherTalosVersion),
)
assertClusterAndAPIReady(t, clusterName, options, withKubernetesVersion(machineOptions.KubernetesVersion))
t.Run(
"MaintenanceTestConfigShouldStillBePresent",
AssertMaintenanceTestConfigIsPresent(t.Context(), options.omniClient.Omni().State(), clusterName, 0), // check the maintenance config in the first machine
)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testKubernetesUpgrades(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests upgrading Kubernetes version, including reverting a failed upgrade.`)
t.Parallel()
options.claimMachines(t, 4)
clusterName := "integration-k8s-upgrade"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 3,
Workers: 1,
MachineOptions: MachineOptions{
TalosVersion: options.MachineOptions.TalosVersion,
KubernetesVersion: options.AnotherKubernetesVersion,
},
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options, withKubernetesVersion(options.AnotherKubernetesVersion))
t.Run(
"KubernetesUpgradeShouldSucceed",
AssertKubernetesUpgradeFlow(
t.Context(), options.omniClient.Omni().State(), options.omniClient.Management(),
clusterName,
options.MachineOptions.KubernetesVersion,
),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"FailedKubernetesUpgradeShouldBeRevertible",
AssertKubernetesUpgradeIsRevertible(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.KubernetesVersion),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testEtcdBackupAndRestore(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests automatic & manual backup & restore for workload etcd.
Automatic backups are enabled, done, and then a manual backup is created.
Afterwards, a cluster's control plane is destroyed then recovered from the backup.
Finally, a completely new cluster is created using the same backup to test the "point-in-time recovery".`)
t.Parallel()
options.claimMachines(t, 6)
clusterName := "integration-etcd-backup"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 3,
Workers: 1,
EtcdBackup: &specs.EtcdBackupConf{
Interval: durationpb.New(2 * time.Hour),
Enabled: true,
},
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
runTests(t,
AssertBlockKubernetesDeploymentCreateAndRunning(t.Context(), options.omniClient.Management(),
clusterName,
"default",
"test",
),
)
t.Run(
"KubernetesSecretShouldBeCreated",
AssertKubernetesSecretIsCreated(t.Context(), options.omniClient.Management(),
clusterName, "default", "test", "backup-test-secret-val"),
)
t.Run(
"EtcdAutomaticBackupShouldBeCreated",
AssertEtcdAutomaticBackupIsCreated(t.Context(), options.omniClient.Omni().State(), clusterName),
)
t.Run(
"EtcdManualBackupShouldBeCreated",
AssertEtcdManualBackupIsCreated(t.Context(), options.omniClient.Omni().State(), clusterName),
)
secondClusterName := "integration-etcd-backup-new-cluster"
runTests(
t,
AssertBlockCreateClusterFromEtcdBackup(t.Context(), options.omniClient, options.talosAPIKeyPrepare, options.Options,
clusterName,
secondClusterName,
"default",
"test",
),
)
t.Run(
"EtcdSecretShouldBeSameAfterCreateFromBackup",
AssertKubernetesSecretHasValue(t.Context(), options.omniClient.Management(), secondClusterName, "default", "test", "backup-test-secret-val"),
)
t.Run(
"NewClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), secondClusterName, false, false),
)
runTests(
t,
AssertBlockRestoreEtcdFromLatestBackup(t.Context(), options.omniClient, options.talosAPIKeyPrepare, options.Options,
3,
clusterName,
"default",
"test",
),
)
t.Run(
"RestoredClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testMaintenanceUpgrade(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Test upgrading (downgrading) a machine in maintenance mode.
Create a cluster out of a single machine on version1, remove cluster (the machine will stay on version1, Talos is installed).
Create a cluster out of the same machine on version2, Omni should upgrade the machine to version2 while in maintenance.`)
t.Parallel()
options.claimMachines(t, 1)
t.Run(
"MachineShouldBeUpgradedInMaintenanceMode",
AssertMachineShouldBeUpgradedInMaintenanceMode(
t.Context(), options.omniClient,
"integration-maintenance-upgrade",
options.AnotherKubernetesVersion,
options.MachineOptions.TalosVersion,
options.AnotherTalosVersion,
options.talosAPIKeyPrepare,
),
)
}
}
func testAuth(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Test authorization on accessing Omni API, some tests run without a cluster, some only run with a context of a cluster.`)
t.Parallel()
options.claimMachines(t, 1)
t.Run(
"AnonymousRequestShouldBeDenied",
AssertAnonymousAuthenication(t.Context(), options.omniClient),
)
t.Run(
"InvalidSignatureShouldBeDenied",
AssertAPIInvalidSignature(t.Context(), options.omniClient),
)
t.Run(
"PublicKeyWithoutLifetimeShouldNotBeRegistered",
AssertPublicKeyWithoutLifetimeNotRegistered(t.Context(), options.omniClient),
)
t.Run(
"PublicKeyWithLongLifetimeShouldNotBeRegistered",
AssertPublicKeyWithLongLifetimeNotRegistered(t.Context(), options.omniClient),
)
t.Run(
"OmniconfigShouldBeDownloadable",
AssertOmniconfigDownload(t.Context(), options.omniClient),
)
t.Run(
"PublicKeyWithUnknownEmailShouldNotBeRegistered",
AssertRegisterPublicKeyWithUnknownEmail(t.Context(), options.omniClient),
)
t.Run(
"ServiceAccountAPIShouldWork",
AssertServiceAccountAPIFlow(t.Context(), options.omniClient),
)
t.Run(
"ResourceAuthzShouldWork",
AssertResourceAuthz(t.Context(), options.omniClient, options.clientConfig),
)
t.Run(
"ResourceAuthzWithACLShouldWork",
AssertResourceAuthzWithACL(t.Context(), options.omniClient, options.clientConfig),
)
clusterName := "integration-auth"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 0,
Features: &specs.ClusterSpec_Features{
UseEmbeddedDiscoveryService: true,
},
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"APIAuthorizationShouldBeTested",
AssertAPIAuthz(t.Context(), options.omniClient, options.clientConfig, clusterName),
)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testClusterTemplate(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Test flow of cluster creation and scaling using cluster templates.`)
t.Parallel()
options.claimMachines(t, 5)
t.Run(
"TestClusterTemplateFlow",
AssertClusterTemplateFlow(t.Context(), options.omniClient.Omni().State(), options.MachineOptions),
)
}
}
func testWorkloadProxy(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Test workload service proxying feature`)
t.Parallel()
options.claimMachines(t, 1)
clusterName := "integration-workload-proxy"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 0,
Features: &specs.ClusterSpec_Features{
EnableWorkloadProxy: true,
},
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"WorkloadProxyShouldBeTested",
AssertWorkloadProxy(t.Context(), options.omniClient, clusterName),
)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false),
)
}
}
func testStaticInfraProvider(options *TestOptions) TestFunc {
return func(t *testing.T) {
t.Log(`
Tests common Omni operations on machines created by a static infrastructure provider:,
Note: this test expects all machines to be provisioned by the bare-metal infra provider as it doesn't filter them.
- create a 1+0 cluster - assert that cluster is healthy and ready
- scale it up to be 3+1 - assert that cluster is healthy and ready
- assert that machines are not ready to use (occupied)
- scale it down to be 1+0 - assert that cluster is healthy and ready
- destroy the cluster - assert that machines are wiped, then marked as ready to use
- create a new 3+1 cluster
- assert that cluster is healthy and ready
- remove links of the machines
`)
t.Parallel()
clusterName := "integration-static-infra-provider"
t.Run(
"ClusterShouldBeCreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 1,
Workers: 0,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: true,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeScaledUp",
ScaleClusterUp(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: 2,
Workers: 1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ExtensionsShouldBeUpdated",
UpdateExtensions(t.Context(), options.omniClient, clusterName, []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}),
)
t.Run(
"MachinesShouldBeAllocated",
AssertInfraMachinesAreAllocated(t.Context(), options.omniClient.Omni().State(), clusterName,
options.MachineOptions.TalosVersion, []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}),
)
t.Run(
"ClusterShouldBeScaledDown",
ScaleClusterDown(t.Context(), options.omniClient.Omni().State(), ClusterOptions{
Name: clusterName,
ControlPlanes: -2,
Workers: -1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, true),
)
t.Run(
"ClusterShouldBeRecreated",
CreateCluster(t.Context(), options.omniClient, ClusterOptions{
Name: clusterName,
ControlPlanes: 3,
Workers: 1,
MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,
SkipExtensionCheckOnCreate: true,
}),
)
assertClusterAndAPIReady(t, clusterName, options)
t.Run(
"ClusterShouldBeDestroyed",
AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, true),
)
}
}