From c9c4c8e10db05e177d139ff07e5b98fac96581bc Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Mon, 2 Jun 2025 22:04:12 +0300 Subject: [PATCH] test: use `go test` to build and run Omni integration tests All test modules were moved under `integration` tag and are now in `internal/integration` folder: no more `cmd/integration-test` executable. New Kres version is able to build the same executable from the tests directory instead. All Omni related flags were renamed, for example `--endpoint` -> `--omni.endpoint`. 2 more functional changes: - Enabled `--test.failfast` for all test runs. - Removed finalizers, which were running if the test has failed. Both of these changes should make it easier to understand the test failure: Talos node logs won't be cluttered with the finalizer tearing down the cluster. Fixes: https://github.com/siderolabs/omni/issues/1171 Signed-off-by: Artem Chernyshev --- .dockerignore | 3 +- .github/workflows/ci.yaml | 97 +- .github/workflows/e2e-backups-cron.yaml | 6 +- .../workflows/e2e-forced-removal-cron.yaml | 6 +- .github/workflows/e2e-scaling-cron.yaml | 6 +- .github/workflows/e2e-short-cron.yaml | 6 +- .../workflows/e2e-short-secureboot-cron.yaml | 6 +- .github/workflows/e2e-templates-cron.yaml | 6 +- .github/workflows/e2e-upgrades-cron.yaml | 6 +- .../workflows/e2e-workload-proxy-cron.yaml | 6 +- .github/workflows/helm.yaml | 2 +- .github/workflows/slack-notify.yaml | 2 +- .golangci.yml | 2 +- .kres.yaml | 63 +- Dockerfile | 68 +- Makefile | 78 +- SECURITY.md | 10 +- client/.golangci.yml | 2 +- cmd/integration-test/main.go | 20 - cmd/integration-test/pkg/root.go | 232 --- cmd/integration-test/pkg/tests/infra.go | 365 ---- cmd/integration-test/pkg/tests/stats.go | 143 -- cmd/integration-test/pkg/tests/tests.go | 1740 ----------------- cmd/integration-test/pkg/tests/utils.go | 61 - .../make-cookies/main.go | 2 +- frontend/package.json | 2 +- hack/test/integration.sh | 30 +- .../integration/auth_test.go | 6 +- .../integration/backup_test.go | 4 +- .../integration/blocks_test.go | 90 +- .../integration/cleanup_test.go | 4 +- .../integration/cli_test.go | 4 +- .../integration/cluster_test.go | 10 +- .../integration/common_test.go | 137 +- .../integration/config_patch_test.go | 4 +- .../integration/extensions_test.go | 4 +- .../integration/image_test.go | 4 +- internal/integration/infra_test.go | 359 ++++ internal/integration/integration_test.go | 348 ++++ .../integration/kubernetes_node_audit_test.go | 11 +- .../integration/kubernetes_test.go | 4 +- .../integration/machines_test.go | 4 +- .../integration/maintenance_test.go | 4 +- .../integration/omniconfig_test.go | 4 +- .../integration/rolling_update_test.go | 4 +- .../integration/siderolink_test.go | 4 +- internal/integration/stats_test.go | 143 ++ internal/integration/suites_test.go | 1373 +++++++++++++ .../integration/talos_test.go | 14 +- .../integration/template_test.go | 4 +- .../integration}/testdata/cluster-1.tmpl.yaml | 0 .../integration}/testdata/cluster-2.tmpl.yaml | 0 .../testdata/sidero-labs-icon.svg | 0 .../integration/workload_proxy_test.go | 6 +- .../pkg/clientconfig/clientconfig.go | 0 .../pkg/clientconfig/register_key_debug.go | 0 .../pkg/clientconfig/register_key_no_debug.go | 0 57 files changed, 2715 insertions(+), 2804 deletions(-) delete mode 100644 cmd/integration-test/main.go delete mode 100644 cmd/integration-test/pkg/root.go delete mode 100644 cmd/integration-test/pkg/tests/infra.go delete mode 100644 cmd/integration-test/pkg/tests/stats.go delete mode 100644 cmd/integration-test/pkg/tests/tests.go delete mode 100644 cmd/integration-test/pkg/tests/utils.go rename cmd/{integration-test => }/make-cookies/main.go (95%) rename cmd/integration-test/pkg/tests/auth.go => internal/integration/auth_test.go (99%) rename cmd/integration-test/pkg/tests/backup.go => internal/integration/backup_test.go (99%) rename cmd/integration-test/pkg/tests/blocks.go => internal/integration/blocks_test.go (68%) rename cmd/integration-test/pkg/tests/cleanup.go => internal/integration/cleanup_test.go (95%) rename cmd/integration-test/pkg/tests/cli.go => internal/integration/cli_test.go (99%) rename cmd/integration-test/pkg/tests/cluster.go => internal/integration/cluster_test.go (99%) rename cmd/integration-test/pkg/tests/common.go => internal/integration/common_test.go (57%) rename cmd/integration-test/pkg/tests/config_patch.go => internal/integration/config_patch_test.go (99%) rename cmd/integration-test/pkg/tests/extensions.go => internal/integration/extensions_test.go (99%) rename cmd/integration-test/pkg/tests/image.go => internal/integration/image_test.go (97%) create mode 100644 internal/integration/infra_test.go create mode 100644 internal/integration/integration_test.go rename cmd/integration-test/pkg/tests/kubernetes_node_audit.go => internal/integration/kubernetes_node_audit_test.go (93%) rename cmd/integration-test/pkg/tests/kubernetes.go => internal/integration/kubernetes_test.go (99%) rename cmd/integration-test/pkg/tests/machines.go => internal/integration/machines_test.go (99%) rename cmd/integration-test/pkg/tests/maintenance.go => internal/integration/maintenance_test.go (97%) rename cmd/integration-test/pkg/tests/omniconfig.go => internal/integration/omniconfig_test.go (93%) rename cmd/integration-test/pkg/tests/rolling_update.go => internal/integration/rolling_update_test.go (99%) rename cmd/integration-test/pkg/tests/siderolink.go => internal/integration/siderolink_test.go (98%) create mode 100644 internal/integration/stats_test.go create mode 100644 internal/integration/suites_test.go rename cmd/integration-test/pkg/tests/talos.go => internal/integration/talos_test.go (98%) rename cmd/integration-test/pkg/tests/template.go => internal/integration/template_test.go (99%) rename {cmd/integration-test/pkg/tests => internal/integration}/testdata/cluster-1.tmpl.yaml (100%) rename {cmd/integration-test/pkg/tests => internal/integration}/testdata/cluster-2.tmpl.yaml (100%) rename {cmd/integration-test/pkg/tests => internal/integration}/testdata/sidero-labs-icon.svg (100%) rename cmd/integration-test/pkg/tests/workload_proxy.go => internal/integration/workload_proxy_test.go (98%) rename {cmd/integration-test => internal}/pkg/clientconfig/clientconfig.go (100%) rename {cmd/integration-test => internal}/pkg/clientconfig/register_key_debug.go (100%) rename {cmd/integration-test => internal}/pkg/clientconfig/register_key_no_debug.go (100%) diff --git a/.dockerignore b/.dockerignore index 4e09770e..22e8416b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2024-10-31T13:15:39Z by kres 6d3cad4-dirty. +# Generated on 2025-05-30T17:31:23Z by kres 9f64b0d. * !frontend/src @@ -28,5 +28,6 @@ !CONTRIBUTING.md !DEVELOPMENT.md !README.md +!SECURITY.md !.markdownlint.json !.license-header.go.txt diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c5855dbe..9c49f19f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: default concurrency: @@ -108,39 +108,15 @@ jobs: - name: acompat run: | make acompat - - name: integration-test + - name: make-cookies run: | - make integration-test - - name: lint - run: | - make lint - - name: Login to registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - password: ${{ secrets.GITHUB_TOKEN }} - registry: ghcr.io - username: ${{ github.repository_owner }} - - name: image-integration-test - run: | - make image-integration-test - - name: push-omni-integration-test - if: github.event_name != 'pull_request' - env: - PLATFORM: linux/amd64,linux/arm64 - PUSH: "true" - run: | - make image-integration-test - - name: push-omni-integration-test-latest - if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' - env: - PLATFORM: linux/amd64,linux/arm64 - PUSH: "true" - run: | - make image-integration-test IMAGE_TAG=latest + make make-cookies - name: omni run: | make omni + - name: lint + run: | + make lint - name: Login to registry if: github.event_name != 'pull_request' uses: docker/login-action@v3 @@ -168,13 +144,38 @@ jobs: - name: omnictl run: | make omnictl + - name: integration-test + run: | + make integration-test + - name: Login to registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + password: ${{ secrets.GITHUB_TOKEN }} + registry: ghcr.io + username: ${{ github.repository_owner }} + - name: image-integration-test + run: | + make image-integration-test + - name: push-integration-test + if: github.event_name != 'pull_request' + env: + PUSH: "true" + run: | + make image-integration-test + - name: push-integration-test-latest + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + env: + PUSH: "true" + run: | + make image-integration-test IMAGE_TAG=latest - name: run-integration-test if: github.event_name == 'pull_request' env: INTEGRATION_RUN_E2E_TEST: "true" - INTEGRATION_TEST_ARGS: --test.run CleanState/|Auth/|DefaultCluster/|CLICommands/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|Auth|DefaultCluster|CLICommands)$ RUN_TALEMU_TESTS: "true" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -297,9 +298,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|EtcdBackupAndRestore + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|EtcdBackupAndRestore)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -376,9 +377,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|ForcedMachineRemoval/|ReplaceControlPlanes/|ConfigPatching/|KubernetesNodeAudit/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|ForcedMachineRemoval|ReplaceControlPlanes|ConfigPatching|KubernetesNodeAudit)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -455,9 +456,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|RollingUpdateParallelism + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|RollingUpdateParallelism)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -534,9 +535,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|TalosImageGeneration/|ImmediateClusterDestruction/|DefaultCluster/|EncryptedCluster/|SinglenodeCluster/|Auth/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|TalosImageGeneration|ImmediateClusterDestruction|DefaultCluster|EncryptedCluster|SinglenodeCluster|Auth)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -614,9 +615,9 @@ jobs: env: ENABLE_SECUREBOOT: "true" INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|TalosImageGeneration/|ImmediateClusterDestruction/|DefaultCluster/|EncryptedCluster/|SinglenodeCluster/|Auth/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|TalosImageGeneration|ImmediateClusterDestruction|DefaultCluster|EncryptedCluster|SinglenodeCluster|Auth)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -693,9 +694,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|ClusterTemplate/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|ClusterTemplate)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -772,9 +773,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test @@ -851,9 +852,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|WorkloadProxy + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|WorkloadProxy)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-backups-cron.yaml b/.github/workflows/e2e-backups-cron.yaml index 1c64c575..404081c9 100644 --- a/.github/workflows/e2e-backups-cron.yaml +++ b/.github/workflows/e2e-backups-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-backups-cron concurrency: @@ -61,9 +61,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|EtcdBackupAndRestore + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|EtcdBackupAndRestore)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-forced-removal-cron.yaml b/.github/workflows/e2e-forced-removal-cron.yaml index d20d3363..8aa72ca7 100644 --- a/.github/workflows/e2e-forced-removal-cron.yaml +++ b/.github/workflows/e2e-forced-removal-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-forced-removal-cron concurrency: @@ -61,9 +61,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|ForcedMachineRemoval/|ReplaceControlPlanes/|ConfigPatching/|KubernetesNodeAudit/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|ForcedMachineRemoval|ReplaceControlPlanes|ConfigPatching|KubernetesNodeAudit)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-scaling-cron.yaml b/.github/workflows/e2e-scaling-cron.yaml index 768b8112..3d1ff156 100644 --- a/.github/workflows/e2e-scaling-cron.yaml +++ b/.github/workflows/e2e-scaling-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-scaling-cron concurrency: @@ -61,9 +61,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|RollingUpdateParallelism + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|RollingUpdateParallelism)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-short-cron.yaml b/.github/workflows/e2e-short-cron.yaml index 68d6d3b5..dd548d15 100644 --- a/.github/workflows/e2e-short-cron.yaml +++ b/.github/workflows/e2e-short-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-short-cron concurrency: @@ -61,9 +61,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|TalosImageGeneration/|ImmediateClusterDestruction/|DefaultCluster/|EncryptedCluster/|SinglenodeCluster/|Auth/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|TalosImageGeneration|ImmediateClusterDestruction|DefaultCluster|EncryptedCluster|SinglenodeCluster|Auth)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-short-secureboot-cron.yaml b/.github/workflows/e2e-short-secureboot-cron.yaml index 258bd217..19ab09b2 100644 --- a/.github/workflows/e2e-short-secureboot-cron.yaml +++ b/.github/workflows/e2e-short-secureboot-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-short-secureboot-cron concurrency: @@ -62,9 +62,9 @@ jobs: env: ENABLE_SECUREBOOT: "true" INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|TalosImageGeneration/|ImmediateClusterDestruction/|DefaultCluster/|EncryptedCluster/|SinglenodeCluster/|Auth/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|TalosImageGeneration|ImmediateClusterDestruction|DefaultCluster|EncryptedCluster|SinglenodeCluster|Auth)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-templates-cron.yaml b/.github/workflows/e2e-templates-cron.yaml index 94efccdd..e4d960a4 100644 --- a/.github/workflows/e2e-templates-cron.yaml +++ b/.github/workflows/e2e-templates-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-templates-cron concurrency: @@ -61,9 +61,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|ClusterTemplate/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|ClusterTemplate)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-upgrades-cron.yaml b/.github/workflows/e2e-upgrades-cron.yaml index cc8132b6..97484433 100644 --- a/.github/workflows/e2e-upgrades-cron.yaml +++ b/.github/workflows/e2e-upgrades-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-upgrades-cron concurrency: @@ -61,9 +61,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/ + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/e2e-workload-proxy-cron.yaml b/.github/workflows/e2e-workload-proxy-cron.yaml index f69d5c7b..78c3dca9 100644 --- a/.github/workflows/e2e-workload-proxy-cron.yaml +++ b/.github/workflows/e2e-workload-proxy-cron.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-22T17:54:46Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: e2e-workload-proxy-cron concurrency: @@ -61,9 +61,9 @@ jobs: - name: run-integration-test env: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: --test.run CleanState/|WorkloadProxy + INTEGRATION_TEST_ARGS: --test.run TestIntegration/Suites/(CleanState|WorkloadProxy)$ RUN_TALEMU_TESTS: "false" - TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/ + TALEMU_TEST_ARGS: --test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$ WITH_DEBUG: "true" run: | sudo -E make run-integration-test diff --git a/.github/workflows/helm.yaml b/.github/workflows/helm.yaml index 06d32961..f9042c89 100644 --- a/.github/workflows/helm.yaml +++ b/.github/workflows/helm.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-07T09:19:30Z by kres 5ad3e5f. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. name: helm concurrency: diff --git a/.github/workflows/slack-notify.yaml b/.github/workflows/slack-notify.yaml index fe2c1792..4eff2e74 100644 --- a/.github/workflows/slack-notify.yaml +++ b/.github/workflows/slack-notify.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2024-11-28T14:39:19Z by kres 232fe63. +# Generated on 2025-05-30T19:29:08Z by kres 9f64b0d-dirty. name: slack-notify "on": diff --git a/.golangci.yml b/.golangci.yml index 4f8b1972..c407d3f0 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-05T10:42:38Z by kres 1a0156b. +# Generated on 2025-05-30T19:29:08Z by kres 9f64b0d-dirty. version: "2" diff --git a/.kres.yaml b/.kres.yaml index 5f5090d6..ec7d1391 100644 --- a/.kres.yaml +++ b/.kres.yaml @@ -9,11 +9,36 @@ name: acompat spec: disableImage: true --- +kind: auto.CommandConfig +name: make-cookies +spec: + disableImage: true +--- kind: auto.Helm spec: enabled: true chartDir: deploy/helm/omni --- +kind: auto.IntegrationTests +spec: + tests: + - path: internal/integration + name: integration-test + enableDockerImage: true + outputs: + linux-amd64: + GOOS: linux + GOARCH: amd64 + linux-arm64: + GOOS: linux + GOARCH: arm64 + darwin-amd64: + GOOS: darwin + GOARCH: amd64 + darwin-arm64: + GOOS: darwin + GOARCH: arm64 +--- kind: common.GHWorkflow spec: customRunners: @@ -187,8 +212,8 @@ spec: environment: WITH_DEBUG: "true" INTEGRATION_RUN_E2E_TEST: "true" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|Auth/|DefaultCluster/|CLICommands/" - TALEMU_TEST_ARGS: "--test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/|ClusterTemplate/|ScaleUpAndDownAutoProvisionMachineSets/" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|Auth|DefaultCluster|CLICommands)$" + TALEMU_TEST_ARGS: "--test.run TestIntegration/Suites/(ImmediateClusterDestruction|EncryptedCluster|SinglenodeCluster|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade|ClusterTemplate|ScaleUpAndDownAutoProvisionMachineSets)$" RUN_TALEMU_TESTS: true jobs: - name: e2e-short-secureboot @@ -200,7 +225,7 @@ spec: - integration/e2e-short-secureboot environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|TalosImageGeneration/|ImmediateClusterDestruction/|DefaultCluster/|EncryptedCluster/|SinglenodeCluster/|Auth/" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|TalosImageGeneration|ImmediateClusterDestruction|DefaultCluster|EncryptedCluster|SinglenodeCluster|Auth)$" RUN_TALEMU_TESTS: false ENABLE_SECUREBOOT: true - name: e2e-short @@ -213,7 +238,7 @@ spec: - integration/e2e-short environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|TalosImageGeneration/|ImmediateClusterDestruction/|DefaultCluster/|EncryptedCluster/|SinglenodeCluster/|Auth/" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|TalosImageGeneration|ImmediateClusterDestruction|DefaultCluster|EncryptedCluster|SinglenodeCluster|Auth)$" RUN_TALEMU_TESTS: false - name: e2e-scaling crons: @@ -225,7 +250,7 @@ spec: - integration/e2e-scaling environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|RollingUpdateParallelism" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|ScaleUpAndDown|ScaleUpAndDownMachineClassBasedMachineSets|RollingUpdateParallelism)$" RUN_TALEMU_TESTS: false - name: e2e-forced-removal crons: @@ -237,7 +262,7 @@ spec: - integration/e2e-forced-removal environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|ForcedMachineRemoval/|ReplaceControlPlanes/|ConfigPatching/|KubernetesNodeAudit/" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|ForcedMachineRemoval|ReplaceControlPlanes|ConfigPatching|KubernetesNodeAudit)$" RUN_TALEMU_TESTS: false - name: e2e-upgrades crons: @@ -249,7 +274,7 @@ spec: - integration/e2e-upgrades environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceUpgrade/" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|TalosUpgrades|KubernetesUpgrades|MaintenanceUpgrade)$" RUN_TALEMU_TESTS: false - name: e2e-templates crons: @@ -261,7 +286,7 @@ spec: - integration/e2e-templates environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|ClusterTemplate/" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|ClusterTemplate)$" RUN_TALEMU_TESTS: false - name: e2e-backups crons: @@ -273,7 +298,7 @@ spec: - integration/e2e-backups environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|EtcdBackupAndRestore" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|EtcdBackupAndRestore)$" RUN_TALEMU_TESTS: false - name: e2e-workload-proxy crons: @@ -285,7 +310,7 @@ spec: - integration/e2e-workload-proxy environmentOverride: INTEGRATION_RUN_E2E_TEST: "false" - INTEGRATION_TEST_ARGS: "--test.run CleanState/|WorkloadProxy" + INTEGRATION_TEST_ARGS: "--test.run TestIntegration/Suites/(CleanState|WorkloadProxy)$" RUN_TALEMU_TESTS: false --- kind: common.Build @@ -348,17 +373,6 @@ spec: GOOS: windows GOARCH: amd64 --- -kind: golang.Build -name: integration-test -spec: - outputs: - linux-amd64: - GOOS: linux - GOARCH: amd64 - linux-arm64: - GOOS: linux - GOARCH: arm64 ---- kind: golang.Generate spec: versionPackagePath: internal/version @@ -529,13 +543,6 @@ spec: - omni-* --- kind: common.Image -name: image-integration-test -spec: - extraEnvironment: - PLATFORM: linux/amd64,linux/arm64 - imageName: "omni-integration-test" ---- -kind: common.Image name: image-omni spec: extraEnvironment: diff --git a/Dockerfile b/Dockerfile index e07b6f9d..53bf28f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-20T20:30:25Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. ARG JS_TOOLCHAIN ARG TOOLCHAIN @@ -29,6 +29,7 @@ COPY ./CHANGELOG.md ./CHANGELOG.md COPY ./CONTRIBUTING.md ./CONTRIBUTING.md COPY ./DEVELOPMENT.md ./DEVELOPMENT.md COPY ./README.md ./README.md +COPY ./SECURITY.md ./SECURITY.md RUN bunx markdownlint --ignore "CHANGELOG.md" --ignore "**/node_modules/**" --ignore '**/hack/chglog/**' --rules sentences-per-line . # collects proto specs @@ -337,29 +338,65 @@ ARG SHA ARG TAG RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg go build ${GO_BUILDFLAGS} -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=acompat -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /acompat-linux-amd64 -# builds integration-test-linux-amd64 -FROM base AS integration-test-linux-amd64-build +# builds integration-test-darwin-amd64 +FROM base AS integration-test-darwin-amd64-build COPY --from=generate / / COPY --from=embed-generate / / -WORKDIR /src/cmd/integration-test +WORKDIR /src/internal/integration ARG GO_BUILDFLAGS ARG GO_LDFLAGS ARG VERSION_PKG="internal/version" ARG SHA ARG TAG -RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg GOARCH=amd64 GOOS=linux go build ${GO_BUILDFLAGS} -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=integration-test -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /integration-test-linux-amd64 +RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg GOARCH=amd64 GOOS=darwin go test -c -covermode=atomic -tags integration,sidero.debug -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=integration-test -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /integration-test-darwin-amd64 + +# builds integration-test-darwin-arm64 +FROM base AS integration-test-darwin-arm64-build +COPY --from=generate / / +COPY --from=embed-generate / / +WORKDIR /src/internal/integration +ARG GO_BUILDFLAGS +ARG GO_LDFLAGS +ARG VERSION_PKG="internal/version" +ARG SHA +ARG TAG +RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg GOARCH=arm64 GOOS=darwin go test -c -covermode=atomic -tags integration,sidero.debug -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=integration-test -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /integration-test-darwin-arm64 + +# builds integration-test-linux-amd64 +FROM base AS integration-test-linux-amd64-build +COPY --from=generate / / +COPY --from=embed-generate / / +WORKDIR /src/internal/integration +ARG GO_BUILDFLAGS +ARG GO_LDFLAGS +ARG VERSION_PKG="internal/version" +ARG SHA +ARG TAG +RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg GOARCH=amd64 GOOS=linux go test -c -covermode=atomic -tags integration,sidero.debug -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=integration-test -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /integration-test-linux-amd64 # builds integration-test-linux-arm64 FROM base AS integration-test-linux-arm64-build COPY --from=generate / / COPY --from=embed-generate / / -WORKDIR /src/cmd/integration-test +WORKDIR /src/internal/integration ARG GO_BUILDFLAGS ARG GO_LDFLAGS ARG VERSION_PKG="internal/version" ARG SHA ARG TAG -RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg GOARCH=arm64 GOOS=linux go build ${GO_BUILDFLAGS} -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=integration-test -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /integration-test-linux-arm64 +RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg GOARCH=arm64 GOOS=linux go test -c -covermode=atomic -tags integration,sidero.debug -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=integration-test -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /integration-test-linux-arm64 + +# builds make-cookies-linux-amd64 +FROM base AS make-cookies-linux-amd64-build +COPY --from=generate / / +COPY --from=embed-generate / / +WORKDIR /src/cmd/make-cookies +ARG GO_BUILDFLAGS +ARG GO_LDFLAGS +ARG VERSION_PKG="internal/version" +ARG SHA +ARG TAG +RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build --mount=type=cache,target=/go/pkg,id=omni/go/pkg go build ${GO_BUILDFLAGS} -ldflags "${GO_LDFLAGS} -X ${VERSION_PKG}.Name=make-cookies -X ${VERSION_PKG}.SHA=${SHA} -X ${VERSION_PKG}.Tag=${TAG}" -o /make-cookies-linux-amd64 # builds omni-darwin-amd64 FROM base AS omni-darwin-amd64-build @@ -472,12 +509,21 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=omni/root/.cache/go-build FROM scratch AS acompat-linux-amd64 COPY --from=acompat-linux-amd64-build /acompat-linux-amd64 /acompat-linux-amd64 +FROM scratch AS integration-test-darwin-amd64 +COPY --from=integration-test-darwin-amd64-build /integration-test-darwin-amd64 /integration-test-darwin-amd64 + +FROM scratch AS integration-test-darwin-arm64 +COPY --from=integration-test-darwin-arm64-build /integration-test-darwin-arm64 /integration-test-darwin-arm64 + FROM scratch AS integration-test-linux-amd64 COPY --from=integration-test-linux-amd64-build /integration-test-linux-amd64 /integration-test-linux-amd64 FROM scratch AS integration-test-linux-arm64 COPY --from=integration-test-linux-arm64-build /integration-test-linux-arm64 /integration-test-linux-arm64 +FROM scratch AS make-cookies-linux-amd64 +COPY --from=make-cookies-linux-amd64-build /make-cookies-linux-amd64 /make-cookies-linux-amd64 + FROM scratch AS omni-darwin-amd64 COPY --from=omni-darwin-amd64-build /omni-darwin-amd64 /omni-darwin-amd64 @@ -513,9 +559,16 @@ COPY --from=acompat-linux-amd64 / / FROM integration-test-linux-${TARGETARCH} AS integration-test FROM scratch AS integration-test-all +COPY --from=integration-test-darwin-amd64 / / +COPY --from=integration-test-darwin-arm64 / / COPY --from=integration-test-linux-amd64 / / COPY --from=integration-test-linux-arm64 / / +FROM make-cookies-linux-${TARGETARCH} AS make-cookies + +FROM scratch AS make-cookies-all +COPY --from=make-cookies-linux-amd64 / / + FROM omni-linux-${TARGETARCH} AS omni FROM scratch AS omni-all @@ -536,7 +589,6 @@ COPY --from=omnictl-windows-amd64.exe / / FROM scratch AS image-integration-test ARG TARGETARCH COPY --from=integration-test integration-test-linux-${TARGETARCH} /integration-test -COPY --from=integration-test integration-test-linux-${TARGETARCH} /integration-test COPY --from=image-fhs / / COPY --from=image-ca-certificates / / LABEL org.opencontainers.image.source=https://github.com/siderolabs/omni diff --git a/Makefile b/Makefile index c0c317f2..86b34bd8 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-20T20:30:25Z by kres 9f64b0d. +# Generated on 2025-06-02T21:18:31Z by kres 99b55ad-dirty. # common variables @@ -148,7 +148,7 @@ else GO_LDFLAGS += -s endif -all: unit-tests-frontend lint-eslint frontend unit-tests-client unit-tests acompat integration-test image-integration-test omni image-omni omnictl helm lint +all: unit-tests-frontend lint-eslint frontend unit-tests-client unit-tests acompat make-cookies omni image-omni omnictl helm integration-test image-integration-test lint $(ARTIFACTS): ## Creates artifacts directory. @mkdir -p $(ARTIFACTS) @@ -256,33 +256,15 @@ acompat-linux-amd64: $(ARTIFACTS)/acompat-linux-amd64 ## Builds executable for .PHONY: acompat acompat: acompat-linux-amd64 ## Builds executables for acompat. -.PHONY: $(ARTIFACTS)/integration-test-linux-amd64 -$(ARTIFACTS)/integration-test-linux-amd64: - @$(MAKE) local-integration-test-linux-amd64 DEST=$(ARTIFACTS) +.PHONY: $(ARTIFACTS)/make-cookies-linux-amd64 +$(ARTIFACTS)/make-cookies-linux-amd64: + @$(MAKE) local-make-cookies-linux-amd64 DEST=$(ARTIFACTS) -.PHONY: integration-test-linux-amd64 -integration-test-linux-amd64: $(ARTIFACTS)/integration-test-linux-amd64 ## Builds executable for integration-test-linux-amd64. +.PHONY: make-cookies-linux-amd64 +make-cookies-linux-amd64: $(ARTIFACTS)/make-cookies-linux-amd64 ## Builds executable for make-cookies-linux-amd64. -.PHONY: $(ARTIFACTS)/integration-test-linux-arm64 -$(ARTIFACTS)/integration-test-linux-arm64: - @$(MAKE) local-integration-test-linux-arm64 DEST=$(ARTIFACTS) - -.PHONY: integration-test-linux-arm64 -integration-test-linux-arm64: $(ARTIFACTS)/integration-test-linux-arm64 ## Builds executable for integration-test-linux-arm64. - -.PHONY: integration-test -integration-test: integration-test-linux-amd64 integration-test-linux-arm64 ## Builds executables for integration-test. - -.PHONY: lint-markdown -lint-markdown: ## Runs markdownlint. - @$(MAKE) target-$@ - -.PHONY: lint -lint: lint-eslint lint-golangci-lint-client lint-gofumpt-client lint-govulncheck-client lint-golangci-lint lint-gofumpt lint-govulncheck lint-markdown ## Run all linters for the project. - -.PHONY: image-integration-test -image-integration-test: ## Builds image for omni-integration-test. - @$(MAKE) registry-$@ IMAGE_NAME="omni-integration-test" +.PHONY: make-cookies +make-cookies: make-cookies-linux-amd64 ## Builds executables for make-cookies. .PHONY: $(ARTIFACTS)/omni-darwin-amd64 $(ARTIFACTS)/omni-darwin-amd64: @@ -315,6 +297,13 @@ omni-linux-arm64: $(ARTIFACTS)/omni-linux-arm64 ## Builds executable for omni-l .PHONY: omni omni: omni-darwin-amd64 omni-darwin-arm64 omni-linux-amd64 omni-linux-arm64 ## Builds executables for omni. +.PHONY: lint-markdown +lint-markdown: ## Runs markdownlint. + @$(MAKE) target-$@ + +.PHONY: lint +lint: lint-eslint lint-golangci-lint-client lint-gofumpt-client lint-govulncheck-client lint-golangci-lint lint-gofumpt lint-govulncheck lint-markdown ## Run all linters for the project. + .PHONY: image-omni image-omni: ## Builds image for omni. @$(MAKE) registry-$@ IMAGE_NAME="omni" @@ -366,6 +355,41 @@ helm-release: helm ## Release helm chart @helm push $(ARTIFACTS)/omni-*.tgz oci://$(HELMREPO) 2>&1 | tee $(ARTIFACTS)/.digest @cosign sign --yes $(COSING_ARGS) $(HELMREPO)/omni@$$(cat $(ARTIFACTS)/.digest | awk -F "[, ]+" '/Digest/{print $$NF}') +.PHONY: $(ARTIFACTS)/integration-test-darwin-amd64 +$(ARTIFACTS)/integration-test-darwin-amd64: + @$(MAKE) local-integration-test-darwin-amd64 DEST=$(ARTIFACTS) + +.PHONY: integration-test-darwin-amd64 +integration-test-darwin-amd64: $(ARTIFACTS)/integration-test-darwin-amd64 ## Builds executable for integration-test-darwin-amd64. + +.PHONY: $(ARTIFACTS)/integration-test-darwin-arm64 +$(ARTIFACTS)/integration-test-darwin-arm64: + @$(MAKE) local-integration-test-darwin-arm64 DEST=$(ARTIFACTS) + +.PHONY: integration-test-darwin-arm64 +integration-test-darwin-arm64: $(ARTIFACTS)/integration-test-darwin-arm64 ## Builds executable for integration-test-darwin-arm64. + +.PHONY: $(ARTIFACTS)/integration-test-linux-amd64 +$(ARTIFACTS)/integration-test-linux-amd64: + @$(MAKE) local-integration-test-linux-amd64 DEST=$(ARTIFACTS) + +.PHONY: integration-test-linux-amd64 +integration-test-linux-amd64: $(ARTIFACTS)/integration-test-linux-amd64 ## Builds executable for integration-test-linux-amd64. + +.PHONY: $(ARTIFACTS)/integration-test-linux-arm64 +$(ARTIFACTS)/integration-test-linux-arm64: + @$(MAKE) local-integration-test-linux-arm64 DEST=$(ARTIFACTS) + +.PHONY: integration-test-linux-arm64 +integration-test-linux-arm64: $(ARTIFACTS)/integration-test-linux-arm64 ## Builds executable for integration-test-linux-arm64. + +.PHONY: integration-test +integration-test: integration-test-darwin-amd64 integration-test-darwin-arm64 integration-test-linux-amd64 integration-test-linux-arm64 ## Builds executables for integration-test. + +.PHONY: image-integration-test +image-integration-test: ## Builds image for integration-test. + @$(MAKE) registry-$@ IMAGE_NAME="integration-test" + .PHONY: dev-server dev-server: hack/dev-server.sh diff --git a/SECURITY.md b/SECURITY.md index 6fd6ecd2..ecc0959c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,16 +1,16 @@ - - # Reporting Security Issues We appreciate your efforts to disclose your findings responsibly, and will make every effort to acknowledge your contributions. To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/siderolabs/omni/security/advisories/new) tab. -The Sidero Labs team will send a response indicating the next steps in handling your report. After the initial response to your report, the security team will keep you informed of the progress toward a fix and a full announcement and may request additional information or guidance. The expected response time is within 3 business days, and the fix is expected to be delivered within 30 days. +The Sidero Labs team will send a response indicating the next steps in handling your report. +After the initial response to your report, the security team will keep you informed of the progress toward a fix and a full announcement and may request additional information or guidance. +The expected response time is within 3 business days, and the fix is expected to be delivered within 30 days. ## Supported Releases -The Sidero Labs team will only provide security updates for the two latest minor releases of Omni, unless you have a support contract that specifies otherwise. If you are using an older version of Omni, we recommend upgrading to the latest release. +The Sidero Labs team will only provide security updates for the two latest minor releases of Omni, unless you have a support contract that specifies otherwise. +If you are using an older version of Omni, we recommend upgrading to the latest release. For example, if the latest release is `v0.49.1`, the supported releases are `v0.48.x` and `v0.49.x`. - diff --git a/client/.golangci.yml b/client/.golangci.yml index 4f8b1972..c407d3f0 100644 --- a/client/.golangci.yml +++ b/client/.golangci.yml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2025-05-05T10:42:38Z by kres 1a0156b. +# Generated on 2025-05-30T19:29:08Z by kres 9f64b0d-dirty. version: "2" diff --git a/cmd/integration-test/main.go b/cmd/integration-test/main.go deleted file mode 100644 index d3e6c83a..00000000 --- a/cmd/integration-test/main.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2025 Sidero Labs, Inc. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. - -// Package main provides the entrypoint for the omni-integration-test binary. -package main - -import ( - "os" - - _ "github.com/siderolabs/omni/cmd/acompat" // this package should always be imported first for init->set env to work - "github.com/siderolabs/omni/cmd/integration-test/pkg" -) - -func main() { - if err := pkg.RootCmd().Execute(); err != nil { - os.Exit(1) - } -} diff --git a/cmd/integration-test/pkg/root.go b/cmd/integration-test/pkg/root.go deleted file mode 100644 index 69ce2763..00000000 --- a/cmd/integration-test/pkg/root.go +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) 2025 Sidero Labs, Inc. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. - -// Package pkg provides the root command for the omni-integration-test binary. -package pkg - -import ( - "context" - "errors" - "fmt" - "io" - "net/url" - "os" - "os/exec" - "os/signal" - "strconv" - "sync" - "time" - - "github.com/mattn/go-shellwords" - "github.com/spf13/cobra" - "gopkg.in/yaml.v3" - - "github.com/siderolabs/omni/client/pkg/compression" - clientconsts "github.com/siderolabs/omni/client/pkg/constants" - "github.com/siderolabs/omni/cmd/integration-test/pkg/clientconfig" - "github.com/siderolabs/omni/cmd/integration-test/pkg/tests" - "github.com/siderolabs/omni/internal/pkg/constants" -) - -// rootCmd represents the base command when called without any subcommands. -var rootCmd = &cobra.Command{ - Use: "omni-integration-test", - Short: "Omni integration test runner.", - Long: ``, - PersistentPreRunE: func(*cobra.Command, []string) error { - return compression.InitConfig(true) - }, - RunE: func(*cobra.Command, []string) error { - return withContext(func(ctx context.Context) error { - // hacky hack - os.Args = append(os.Args[0:1], "-test.v", "-test.parallel", strconv.FormatInt(rootCmdFlags.parallel, 10), "-test.timeout", rootCmdFlags.testsTimeout.String()) - - testOptions := tests.Options{ - RunTestPattern: rootCmdFlags.runTestPattern, - - ExpectedMachines: rootCmdFlags.expectedMachines, - CleanupLinks: rootCmdFlags.cleanupLinks, - RunStatsCheck: rootCmdFlags.runStatsCheck, - SkipExtensionsCheckOnCreate: rootCmdFlags.skipExtensionsCheckOnCreate, - - MachineOptions: rootCmdFlags.machineOptions, - AnotherTalosVersion: rootCmdFlags.anotherTalosVersion, - AnotherKubernetesVersion: rootCmdFlags.anotherKubernetesVersion, - OmnictlPath: rootCmdFlags.omnictlPath, - ScalingTimeout: rootCmdFlags.scalingTimeout, - OutputDir: rootCmdFlags.outputDir, - } - - if rootCmdFlags.provisionConfigFile != "" { - f, err := os.Open(rootCmdFlags.provisionConfigFile) - if err != nil { - return fmt.Errorf("failed to open provision config file %q: %w", rootCmdFlags.provisionConfigFile, err) - } - - decoder := yaml.NewDecoder(f) - - for { - var cfg tests.MachineProvisionConfig - - if err = decoder.Decode(&cfg); err != nil { - if errors.Is(err, io.EOF) { - break - } - - return err - } - - testOptions.ProvisionConfigs = append(testOptions.ProvisionConfigs, cfg) - } - } else { - testOptions.ProvisionConfigs = append(testOptions.ProvisionConfigs, - tests.MachineProvisionConfig{ - MachineCount: rootCmdFlags.provisionMachinesCount, - Provider: tests.MachineProviderConfig{ - ID: rootCmdFlags.infraProvider, - Data: rootCmdFlags.providerData, - }, - }, - ) - } - - if rootCmdFlags.restartAMachineScript != "" { - parsedScript, err := shellwords.Parse(rootCmdFlags.restartAMachineScript) - if err != nil { - return fmt.Errorf("error parsing restart a machine script: %w", err) - } - - testOptions.RestartAMachineFunc = func(ctx context.Context, uuid string) error { - return execCmd(ctx, parsedScript, uuid) - } - } - - if rootCmdFlags.wipeAMachineScript != "" { - parsedScript, err := shellwords.Parse(rootCmdFlags.wipeAMachineScript) - if err != nil { - return fmt.Errorf("error parsing wipe a machine script: %w", err) - } - - testOptions.WipeAMachineFunc = func(ctx context.Context, uuid string) error { - return execCmd(ctx, parsedScript, uuid) - } - } - - if rootCmdFlags.freezeAMachineScript != "" { - parsedScript, err := shellwords.Parse(rootCmdFlags.freezeAMachineScript) - if err != nil { - return fmt.Errorf("error parsing freeze a machine script: %w", err) - } - - testOptions.FreezeAMachineFunc = func(ctx context.Context, uuid string) error { - return execCmd(ctx, parsedScript, uuid) - } - } - - u, err := url.Parse(rootCmdFlags.endpoint) - if err != nil { - return errors.New("error parsing endpoint") - } - - if u.Scheme == "grpc" { - u.Scheme = "http" - } - - testOptions.HTTPEndpoint = u.String() - - clientConfig := clientconfig.New(rootCmdFlags.endpoint) - defer clientConfig.Close() //nolint:errcheck - - return tests.Run(ctx, clientConfig, testOptions) - }) - }, -} - -func execCmd(ctx context.Context, parsedScript []string, args ...string) error { - cmd := exec.CommandContext(ctx, parsedScript[0], append(parsedScript[1:], args...)...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - return cmd.Run() -} - -//nolint:govet -var rootCmdFlags struct { - endpoint string - runTestPattern string - infraProvider string - providerData string - - provisionMachinesCount int - expectedMachines int - expectedBareMetalMachines int - parallel int64 - cleanupLinks bool - runStatsCheck bool - skipExtensionsCheckOnCreate bool - - testsTimeout time.Duration - scalingTimeout time.Duration - - restartAMachineScript string - wipeAMachineScript string - freezeAMachineScript string - anotherTalosVersion string - anotherKubernetesVersion string - omnictlPath string - provisionConfigFile string - outputDir string - - machineOptions tests.MachineOptions -} - -// RootCmd returns the root command. -func RootCmd() *cobra.Command { return onceInit() } - -var onceInit = sync.OnceValue(func() *cobra.Command { - rootCmd.PersistentFlags().StringVar(&rootCmdFlags.endpoint, "endpoint", "grpc://127.0.0.1:8080", "The endpoint of the Omni API.") - rootCmd.Flags().StringVar(&rootCmdFlags.runTestPattern, "test.run", "", "tests to run (regular expression)") - rootCmd.Flags().IntVar(&rootCmdFlags.expectedMachines, "expected-machines", 4, "minimum number of machines expected") - rootCmd.Flags().StringVar(&rootCmdFlags.restartAMachineScript, "restart-a-machine-script", "hack/test/restart-a-vm.sh", "a script to run to restart a machine by UUID (optional)") - rootCmd.Flags().StringVar(&rootCmdFlags.wipeAMachineScript, "wipe-a-machine-script", "hack/test/wipe-a-vm.sh", "a script to run to wipe a machine by UUID (optional)") - rootCmd.Flags().StringVar(&rootCmdFlags.freezeAMachineScript, "freeze-a-machine-script", "hack/test/freeze-a-vm.sh", "a script to run to freeze a machine by UUID (optional)") - rootCmd.Flags().StringVar(&rootCmdFlags.omnictlPath, "omnictl-path", "", "omnictl CLI script path (optional)") - rootCmd.Flags().StringVar(&rootCmdFlags.anotherTalosVersion, "another-talos-version", - constants.AnotherTalosVersion, - "Talos version for upgrade test", - ) - rootCmd.Flags().StringVar( - &rootCmdFlags.machineOptions.TalosVersion, - "talos-version", - clientconsts.DefaultTalosVersion, - "installer version for workload clusters", - ) - rootCmd.Flags().StringVar(&rootCmdFlags.machineOptions.KubernetesVersion, "kubernetes-version", constants.DefaultKubernetesVersion, "Kubernetes version for workload clusters") - rootCmd.Flags().StringVar(&rootCmdFlags.anotherKubernetesVersion, "another-kubernetes-version", constants.AnotherKubernetesVersion, "Kubernetes version for upgrade tests") - rootCmd.Flags().Int64VarP(&rootCmdFlags.parallel, "parallel", "p", 4, "tests parallelism") - rootCmd.Flags().DurationVarP(&rootCmdFlags.testsTimeout, "timeout", "t", time.Hour, "tests global timeout") - rootCmd.Flags().BoolVar(&rootCmdFlags.cleanupLinks, "cleanup-links", false, "remove all links after the tests are complete") - rootCmd.Flags().BoolVar(&rootCmdFlags.runStatsCheck, "run-stats-check", false, "runs stats check after the test is complete") - rootCmd.Flags().IntVar(&rootCmdFlags.provisionMachinesCount, "provision-machines", 0, "provisions machines through the infrastructure provider") - rootCmd.Flags().StringVar(&rootCmdFlags.infraProvider, "infra-provider", "talemu", "use infra provider with the specified ID when provisioning the machines") - rootCmd.Flags().StringVar(&rootCmdFlags.providerData, "provider-data", "{}", "the infra provider machine template data to use") - rootCmd.Flags().DurationVar(&rootCmdFlags.scalingTimeout, "scale-timeout", time.Second*150, "scale up test timeout") - rootCmd.Flags().StringVar(&rootCmdFlags.provisionConfigFile, "provision-config-file", "", "provision machines with the more complicated configuration") - rootCmd.Flags().BoolVar(&rootCmdFlags.skipExtensionsCheckOnCreate, "skip-extensions-check-on-create", false, - "disables checking for hello-world-service extension on the machine allocation and in the upgrade tests") - rootCmd.Flags().StringVar(&rootCmdFlags.outputDir, "output-dir", "/tmp/integration-test", "output directory for the files generated by the test, e.g., the support bundles") - - rootCmd.MarkFlagsMutuallyExclusive("provision-machines", "provision-config-file") - - return rootCmd -}) - -// withContext wraps with CLI context. -func withContext(f func(ctx context.Context) error) error { - ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt) - defer stop() - - return f(ctx) -} diff --git a/cmd/integration-test/pkg/tests/infra.go b/cmd/integration-test/pkg/tests/infra.go deleted file mode 100644 index 822d1055..00000000 --- a/cmd/integration-test/pkg/tests/infra.go +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (c) 2025 Sidero Labs, Inc. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. - -package tests - -import ( - "context" - "fmt" - "slices" - "testing" - "time" - - "github.com/cosi-project/runtime/pkg/resource" - "github.com/cosi-project/runtime/pkg/resource/rtestutils" - "github.com/cosi-project/runtime/pkg/safe" - "github.com/cosi-project/runtime/pkg/state" - "github.com/siderolabs/go-retry/retry" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - "go.uber.org/zap/zaptest" - - "github.com/siderolabs/omni/client/api/omni/specs" - "github.com/siderolabs/omni/client/pkg/client" - "github.com/siderolabs/omni/client/pkg/omni/resources" - "github.com/siderolabs/omni/client/pkg/omni/resources/infra" - "github.com/siderolabs/omni/client/pkg/omni/resources/omni" - "github.com/siderolabs/omni/client/pkg/omni/resources/siderolink" -) - -// AssertMachinesShouldBeProvisioned creates a machine request set and waits until all requests are fulfilled. -// -//nolint:gocognit -func AssertMachinesShouldBeProvisioned(testCtx context.Context, client *client.Client, cfg MachineProvisionConfig, machineRequestSetName, - talosVersion string, -) TestFunc { - return func(t *testing.T) { - ctx, cancel := context.WithTimeout(testCtx, time.Minute*5) - defer cancel() - - rtestutils.AssertResources(ctx, t, client.Omni().State(), []string{cfg.Provider.ID}, func(*infra.ProviderStatus, *assert.Assertions) {}) - - machineRequestSet, err := safe.ReaderGetByID[*omni.MachineRequestSet](ctx, client.Omni().State(), machineRequestSetName) - - if !state.IsNotFoundError(err) { - require.NoError(t, err) - } - - if machineRequestSet != nil { - rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName}) - } - - machineRequestSet = omni.NewMachineRequestSet(resources.DefaultNamespace, machineRequestSetName) - - machineRequestSet.TypedSpec().Value.Extensions = []string{ - "siderolabs/" + HelloWorldServiceExtensionName, - } - - machineRequestSet.TypedSpec().Value.ProviderId = cfg.Provider.ID - machineRequestSet.TypedSpec().Value.TalosVersion = talosVersion - machineRequestSet.TypedSpec().Value.ProviderData = cfg.Provider.Data - machineRequestSet.TypedSpec().Value.MachineCount = int32(cfg.MachineCount) - - require.NoError(t, client.Omni().State().Create(ctx, machineRequestSet)) - - var resources safe.List[*infra.MachineRequestStatus] - - err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error { - resources, err = safe.ReaderListAll[*infra.MachineRequestStatus](ctx, client.Omni().State(), - state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)), - ) - if err != nil { - return err - } - - if resources.Len() != cfg.MachineCount { - return retry.ExpectedErrorf("provision machine count is %d, expected %d", resources.Len(), cfg.MachineCount) - } - - return nil - }) - - require.NoError(t, err) - - err = retry.Constant(time.Minute*5).RetryWithContext(ctx, func(ctx context.Context) error { - var machines safe.List[*omni.MachineStatus] - - machines, err = safe.ReaderListAll[*omni.MachineStatus](ctx, client.Omni().State()) - if err != nil { - return err - } - - if machines.Len() < cfg.MachineCount { - return retry.ExpectedErrorf("links count is %d, expected at least %d", machines.Len(), cfg.MachineCount) - } - - for r := range resources.All() { - requestedMachines := machines.FilterLabelQuery(resource.LabelEqual(omni.LabelMachineRequest, r.Metadata().ID())) - - if requestedMachines.Len() == 0 { - return retry.ExpectedErrorf("machine request %q doesn't have the related link", r.Metadata().ID()) - } - - if requestedMachines.Len() != 1 { - return fmt.Errorf("more than one machine is labeled with %q machine request label", r.Metadata().ID()) - } - - m := requestedMachines.Get(0) - if m.TypedSpec().Value.Hardware == nil { - return retry.ExpectedErrorf("the machine %q is not fully provisioned", r.Metadata().ID()) - } - } - - return nil - }) - - require.NoError(t, err) - } -} - -// AssertMachinesShouldBeDeprovisioned removes the machine request set and checks that all related links were deleted. -func AssertMachinesShouldBeDeprovisioned(testCtx context.Context, client *client.Client, machineRequestSetName string) TestFunc { - return func(t *testing.T) { - ctx, cancel := context.WithTimeout(testCtx, time.Minute*5) - defer cancel() - - requestIDs := rtestutils.ResourceIDs[*infra.MachineRequest](ctx, t, client.Omni().State(), - state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)), - ) - - links, err := safe.ReaderListAll[*siderolink.Link](ctx, client.Omni().State()) - - require.NoError(t, err) - - linkIDs := make([]string, 0, len(requestIDs)) - - for l := range links.All() { - mr, ok := l.Metadata().Labels().Get(omni.LabelMachineRequest) - if !ok { - continue - } - - if slices.Index(requestIDs, mr) != -1 { - linkIDs = append(linkIDs, l.Metadata().ID()) - } - } - - rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName}) - - for _, id := range requestIDs { - rtestutils.AssertNoResource[*infra.MachineRequest](ctx, t, client.Omni().State(), id) - } - - for _, id := range linkIDs { - rtestutils.AssertNoResource[*siderolink.Link](ctx, t, client.Omni().State(), id) - } - } -} - -// AcceptInfraMachines asserts that there are a certain number of machines that are not accepted, provisioned by the static infra provider with the given ID. -// -// It then accepts them all and asserts that the states of various resources are updated as expected. -func AcceptInfraMachines(testCtx context.Context, omniState state.State, infraProviderID string, expectedCount int, disableKexec bool) TestFunc { - const disableKexecConfigPatch = `machine: - install: - extraKernelArgs: - - kexec_load_disabled=1 - sysctls: - kernel.kexec_load_disabled: "1"` - - return func(t *testing.T) { - logger := zaptest.NewLogger(t) - - ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) - defer cancel() - - linksMap := make(map[string]*siderolink.Link, expectedCount) - - err := retry.Constant(time.Minute*10).RetryWithContext(ctx, func(ctx context.Context) error { - links, err := safe.ReaderListAll[*siderolink.Link](ctx, omniState) - if err != nil { - return err - } - - discoveredLinks := 0 - - for link := range links.All() { - providerID, ok := link.Metadata().Annotations().Get(omni.LabelInfraProviderID) - if !ok { - continue - } - - if infraProviderID == providerID { - discoveredLinks++ - } - - linksMap[link.Metadata().ID()] = link - } - - if discoveredLinks != expectedCount { - return retry.ExpectedErrorf("expected %d static infra provider machines, got %d", expectedCount, discoveredLinks) - } - - return nil - }) - - require.NoError(t, err) - - // link count should match the expected count - require.Equal(t, expectedCount, len(linksMap)) - - ids := make([]resource.ID, 0, len(linksMap)) - - for id := range linksMap { - ids = append(ids, id) - - rtestutils.AssertResource(ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) { - assertion.Equal(specs.InfraMachineConfigSpec_PENDING, res.TypedSpec().Value.AcceptanceStatus) - }) - - rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, id) - - rtestutils.AssertNoResource[*omni.Machine](ctx, t, omniState, id) - - // Accept the machine - infraMachineConfig := omni.NewInfraMachineConfig(resources.DefaultNamespace, id) - - infraMachineConfig.TypedSpec().Value.AcceptanceStatus = specs.InfraMachineConfigSpec_ACCEPTED - - if disableKexec { - infraMachineConfig.TypedSpec().Value.ExtraKernelArgs = "kexec_load_disabled=1" - } - - require.NoError(t, omniState.Create(ctx, infraMachineConfig)) - - if disableKexec { - disableKexecConfigPatchRes := omni.NewConfigPatch(resources.DefaultNamespace, fmt.Sprintf("500-%s-disable-kexec", id)) - - disableKexecConfigPatchRes.Metadata().Labels().Set(omni.LabelMachine, id) - - require.NoError(t, disableKexecConfigPatchRes.TypedSpec().Value.SetUncompressedData([]byte(disableKexecConfigPatch))) - require.NoError(t, omniState.Create(ctx, disableKexecConfigPatchRes)) - } - } - - logger.Info("accepted machines", zap.Reflect("infra_provider_id", infraProviderID), zap.Strings("machine_ids", ids)) - - // Assert that the infra.Machines are now marked as accepted - rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.Machine, assertion *assert.Assertions) { - assertion.Equal(specs.InfraMachineConfigSpec_ACCEPTED, res.TypedSpec().Value.AcceptanceStatus) - }) - - // Assert that omni.Machine resources are now created and marked as managed by the static infra provider - rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) { - _, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider) - - assertion.True(isManagedByStaticInfraProvider) - }) - - // Assert that omni.Machine resources are now created - rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) { - _, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider) - - assertion.True(isManagedByStaticInfraProvider) - }) - - // Assert that infra.MachineStatus resources are now created, powered off, marked as ready to use, and the machine labels are set on them - rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.MachineStatus, assertion *assert.Assertions) { - aVal, _ := res.Metadata().Labels().Get("a") - assertion.Equal("b", aVal) - - _, cOk := res.Metadata().Labels().Get("c") - assertion.True(cOk) - - assertion.Equal(specs.InfraMachineStatusSpec_POWER_STATE_OFF, res.TypedSpec().Value.PowerState) - assertion.True(res.TypedSpec().Value.ReadyToUse) - }) - - // Assert the infra provider labels on MachineStatus resources - rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.MachineStatus, assertion *assert.Assertions) { - link := linksMap[res.Metadata().ID()] - - infraProviderID, _ := link.Metadata().Annotations().Get(omni.LabelInfraProviderID) - - aLabel := fmt.Sprintf(omni.InfraProviderLabelPrefixFormat, infraProviderID) + "a" - aVal, _ := res.Metadata().Labels().Get(aLabel) - - assertion.Equal("b", aVal) - - cLabel := fmt.Sprintf(omni.InfraProviderLabelPrefixFormat, infraProviderID) + "c" - _, cOk := res.Metadata().Labels().Get(cLabel) - assertion.True(cOk) - }) - } -} - -// AssertInfraMachinesAreAllocated asserts that the machines that belong to the given cluster and managed by a static infra provider -// are marked as allocated in the related resources. -func AssertInfraMachinesAreAllocated(testCtx context.Context, omniState state.State, clusterID, talosVersion string, extensions []string) TestFunc { - return func(t *testing.T) { - ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) - defer cancel() - - nodeList, err := safe.StateListAll[*omni.MachineSetNode](ctx, omniState, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterID))) - require.NoError(t, err) - - require.Greater(t, nodeList.Len(), 0) - - for machineSetNode := range nodeList.All() { - id := machineSetNode.Metadata().ID() - - // There must be an infra.Machine resource for each node - rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) { - assertion.Equal(talosVersion, res.TypedSpec().Value.ClusterTalosVersion) - assertion.Empty(res.TypedSpec().Value.WipeId) - assertion.Equal(extensions, res.TypedSpec().Value.Extensions) - }) - - // The machine is allocated, so it will be powered on and be ready to use - rtestutils.AssertResource[*infra.MachineStatus](ctx, t, omniState, id, func(res *infra.MachineStatus, assertion *assert.Assertions) { - assertion.Equal(specs.InfraMachineStatusSpec_POWER_STATE_ON, res.TypedSpec().Value.PowerState) - assertion.True(res.TypedSpec().Value.ReadyToUse) - assertion.True(res.TypedSpec().Value.Installed) - }) - } - } -} - -// DestroyInfraMachines removes siderolink.Link resources for all machines managed by a static infra provider, -// and asserts that the related infra.Machine and infra.MachineStatus resources are deleted. -func DestroyInfraMachines(testCtx context.Context, omniState state.State, providerID string, count int) TestFunc { - return func(t *testing.T) { - ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) - defer cancel() - - links, err := safe.StateListAll[*siderolink.Link](ctx, omniState) - require.NoError(t, err) - - var deleted int - - for link := range links.All() { - pid, ok := link.Metadata().Annotations().Get(omni.LabelInfraProviderID) - if !ok { - continue - } - - if pid != providerID { - continue - } - - id := link.Metadata().ID() - - rtestutils.Destroy[*siderolink.Link](ctx, t, omniState, []string{id}) - - rtestutils.AssertNoResource[*infra.Machine](ctx, t, omniState, id) - rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, id) - - deleted++ - } - - require.EqualValues(t, count, deleted) - } -} diff --git a/cmd/integration-test/pkg/tests/stats.go b/cmd/integration-test/pkg/tests/stats.go deleted file mode 100644 index 8f1d35f4..00000000 --- a/cmd/integration-test/pkg/tests/stats.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright (c) 2025 Sidero Labs, Inc. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. - -package tests - -import ( - "context" - "errors" - "fmt" - "sort" - "strings" - "testing" - "time" - - "github.com/prometheus/client_golang/api" - v1 "github.com/prometheus/client_golang/api/prometheus/v1" - "github.com/prometheus/common/model" - "github.com/siderolabs/go-retry/retry" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// AssertStatsLimits checks that metrics don't show any spikes of resource reads/writes, controller wakeups. -// This test should only be run after the integration tests set with Talemu enabled as the thresholds are adjusted for it. -// Should have Prometheus running on 9090. -func AssertStatsLimits(testCtx context.Context) TestFunc { - return func(t *testing.T) { - for _, tt := range []struct { - check func(assert *assert.Assertions, value float64) - name string - query string - }{ - { - name: "resource CRUD", - query: `sum(omni_resource_operations_total{operation=~"create|update", type!="MachineStatusLinks.omni.sidero.dev"})`, - check: func(assert *assert.Assertions, value float64) { - limit := float64(12000) - - assert.Lessf(value, limit, "resource CRUD operations were expected to be less than %f. "+ - "If the limit is exceeded not because of a leak but because you added some new resources/controllers, adjust the limit accordingly.", limit) - }, - }, - { - name: "queue length", - query: `sum(omni_runtime_qcontroller_queue_length)`, - check: func(assert *assert.Assertions, value float64) { assert.Zero(value) }, - }, - { - name: "controller wakeups", - query: `sum(omni_runtime_controller_wakeups{controller!="MachineStatusLinkController"})`, - check: func(assert *assert.Assertions, value float64) { - limit := float64(12000) - - assert.Lessf(value, limit, "controller wakeups were expected to be less than %f. "+ - "If the limit is exceeded not because of a leak but because you added some new resources/controllers, adjust the limit accordingly.", limit) - }, - }, - } { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - ctx, cancel := context.WithTimeout(testCtx, time.Second*16) - defer cancel() - - err := retry.Constant(time.Second * 15).Retry(func() error { - promClient, err := api.NewClient(api.Config{ - Address: "http://127.0.0.1:9090", - }) - if err != nil { - return retry.ExpectedError(err) - } - - var ( - value model.Value - warnings v1.Warnings - ) - - agg := assertionAggregator{} - - v1api := v1.NewAPI(promClient) - - value, warnings, err = v1api.Query(ctx, tt.query, time.Now()) - if err != nil { - return retry.ExpectedError(err) - } - - if len(warnings) > 0 { - return retry.ExpectedErrorf("prometheus query had warnings %#v", warnings) - } - - assert := assert.New(&agg) - - switch val := value.(type) { - case *model.Scalar: - tt.check(assert, float64(val.Value)) - case model.Vector: - tt.check(assert, float64(val[val.Len()-1].Value)) - default: - return fmt.Errorf("unexpected value type %s", val.Type()) - } - - if agg.hadErrors { - return retry.ExpectedError(errors.New(agg.String())) - } - - return nil - }) - - require.NoError(t, err) - }) - } - } -} - -type assertionAggregator struct { - errors map[string]struct{} - hadErrors bool -} - -func (agg *assertionAggregator) Errorf(format string, args ...any) { - errorString := fmt.Sprintf(format, args...) - - if agg.errors == nil { - agg.errors = map[string]struct{}{} - } - - agg.errors[errorString] = struct{}{} - agg.hadErrors = true -} - -func (agg *assertionAggregator) String() string { - lines := make([]string, 0, len(agg.errors)) - - for errorString := range agg.errors { - lines = append(lines, " * "+errorString) - } - - sort.Strings(lines) - - return strings.Join(lines, "\n") -} diff --git a/cmd/integration-test/pkg/tests/tests.go b/cmd/integration-test/pkg/tests/tests.go deleted file mode 100644 index 43b12113..00000000 --- a/cmd/integration-test/pkg/tests/tests.go +++ /dev/null @@ -1,1740 +0,0 @@ -// Copyright (c) 2025 Sidero Labs, Inc. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. - -// Package tests provides the Omni tests. -package tests - -import ( - "context" - "fmt" - "log" - "net/http" - "regexp" - "strings" - "testing" - "time" - - "github.com/cosi-project/runtime/pkg/safe" - "github.com/cosi-project/runtime/pkg/state" - "github.com/siderolabs/gen/xslices" - "github.com/stretchr/testify/assert" - "golang.org/x/sync/semaphore" - "google.golang.org/protobuf/types/known/durationpb" - - "github.com/siderolabs/omni/client/api/omni/specs" - "github.com/siderolabs/omni/client/pkg/constants" - "github.com/siderolabs/omni/client/pkg/omni/resources/omni" - "github.com/siderolabs/omni/client/pkg/omni/resources/siderolink" - "github.com/siderolabs/omni/cmd/integration-test/pkg/clientconfig" -) - -// TestFunc is a testing function prototype. -type TestFunc func(t *testing.T) - -// RestartAMachineFunc is a function to restart a machine by UUID. -type RestartAMachineFunc func(ctx context.Context, uuid string) error - -// WipeAMachineFunc is a function to wipe a machine by UUID. -type WipeAMachineFunc func(ctx context.Context, uuid string) error - -// FreezeAMachineFunc is a function to freeze a machine by UUID. -type FreezeAMachineFunc func(ctx context.Context, uuid string) error - -// HTTPRequestSignerFunc is function to sign the HTTP request. -type HTTPRequestSignerFunc func(ctx context.Context, req *http.Request) error - -// TalosAPIKeyPrepareFunc is a function to prepare a public key for Talos API auth. -type TalosAPIKeyPrepareFunc func(ctx context.Context, contextName string) error - -// Options for the test runner. -// -//nolint:govet -type Options struct { - RunTestPattern string - - CleanupLinks bool - SkipExtensionsCheckOnCreate bool - RunStatsCheck bool - ExpectedMachines int - - RestartAMachineFunc RestartAMachineFunc - WipeAMachineFunc WipeAMachineFunc - FreezeAMachineFunc FreezeAMachineFunc - ProvisionConfigs []MachineProvisionConfig - - MachineOptions MachineOptions - - HTTPEndpoint string - AnotherTalosVersion string - AnotherKubernetesVersion string - OmnictlPath string - ScalingTimeout time.Duration - StaticInfraProvider string - OutputDir string -} - -func (o Options) defaultInfraProvider() string { - if len(o.ProvisionConfigs) == 0 { - return "" - } - - return o.ProvisionConfigs[0].Provider.ID -} - -func (o Options) defaultProviderData() string { - if len(o.ProvisionConfigs) == 0 { - return "{}" - } - - return o.ProvisionConfigs[0].Provider.Data -} - -func (o Options) provisionMachines() bool { - var totalMachineCount int - - for _, cfg := range o.ProvisionConfigs { - totalMachineCount += cfg.MachineCount - } - - return totalMachineCount > 0 -} - -// MachineProvisionConfig tells the test to provision machines from the infra provider. -type MachineProvisionConfig struct { - Provider MachineProviderConfig `yaml:"provider"` - MachineCount int `yaml:"count"` -} - -// MachineProviderConfig keeps the configuration of the infra provider for the machine provision config. -type MachineProviderConfig struct { - ID string `yaml:"id"` - Data string `yaml:"data"` - Static bool `yaml:"static"` -} - -// Run the integration tests. -// -//nolint:maintidx,gocyclo,cyclop,gocognit -func Run(ctx context.Context, clientConfig *clientconfig.ClientConfig, options Options) error { - rootClient, err := clientConfig.GetClient(ctx) - if err != nil { - return err - } - - defer rootClient.Close() //nolint:errcheck - - talosAPIKeyPrepare := func(ctx context.Context, contextName string) error { - return clientconfig.TalosAPIKeyPrepare(ctx, rootClient, contextName) - } - - if !constants.IsDebugBuild { - // noop for non-debug builds - talosAPIKeyPrepare = func(context.Context, string) error { - return nil - } - } - - testList := []testGroup{ - { - Name: "CleanState", - Description: ` -Bring the state of Omni to a clean state by removing all clusters, config patches, etc. which might have been left from previous runs. -Wait for all expected machines to join and be in maintenance mode.`, - Parallel: false, // these tests should run first without other tests interfering - Subtests: []subTest{ - { - "DestroyAllClusterRelatedResources", - DestroyAllClusterRelatedResources(ctx, rootClient.Omni().State()), - }, - // machine discovery, all machines should be in maintenance mode - { - "LinkCountShouldMatchExpectedMachines", - AssertNumberOfLinks(ctx, rootClient.Omni().State(), options.ExpectedMachines), - }, - { - "LinksShouldBeConnected", - AssertLinksConnected(ctx, rootClient.Omni().State()), - }, - { - "LinksShouldMatchMachines", - AssertMachinesMatchLinks(ctx, rootClient.Omni().State()), - }, - { - "MachinesShouldHaveLogs", - AssertMachinesHaveLogs(ctx, rootClient.Omni().State(), rootClient.Management()), - }, - { - "MachinesShouldBeReachableInMaintenanceMode", - AssertTalosMaintenanceAPIAccessViaOmni(ctx, rootClient, talosAPIKeyPrepare), - }, - { - "MachinesShouldBeInMaintenanceMode", - AssertMachineStatus(ctx, rootClient.Omni().State(), true, "", map[string]string{ - omni.MachineStatusLabelConnected: "", - omni.MachineStatusLabelReportingEvents: "", - omni.MachineStatusLabelAvailable: "", - // QEMU-specific labels which should always match, others are specific to the settings (number of cores, etc.) - omni.MachineStatusLabelCPU: "qemu", - omni.MachineStatusLabelArch: "amd64", - omni.MachineStatusLabelPlatform: "metal", - }, nil), - }, - }, - }, - { - Name: "TalosImageGeneration", - Description: ` -Generate various Talos images with Omni and try to download them.`, - Parallel: true, - Subtests: []subTest{ - { - "TalosImagesShouldBeDownloadableUsingCLI", - AssertDownloadUsingCLI(ctx, rootClient, options.OmnictlPath, options.HTTPEndpoint), - }, - { - "TalosImagesShouldBeDownloadable", - AssertSomeImagesAreDownloadable(ctx, rootClient, func(ctx context.Context, req *http.Request) error { - return clientconfig.SignHTTPRequest(ctx, rootClient, req) - }, options.HTTPEndpoint), - }, - }, - }, - { - Name: "CLICommands", - Description: ` -Verify various omnictl commands.`, - Parallel: true, - Subtests: []subTest{ - { - "OmnictlUserCLIShouldWork", - AssertUserCLI(ctx, rootClient, options.OmnictlPath, options.HTTPEndpoint), - }, - }, - }, - { - Name: "KubernetesNodeAudit", - Description: "Test the auditing of the Kubernetes nodes, i.e. when a node is gone from the Omni perspective but still exists on the Kubernetes cluster.", - Parallel: true, - MachineClaim: 2, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-k8s-node-audit", - ControlPlanes: 1, - Workers: 1, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-k8s-node-audit", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "KubernetesNodeAuditShouldBePerformed", - AssertKubernetesNodeAudit(ctx, rootClient.Omni().State(), "integration-k8s-node-audit", rootClient, options), - }, - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-k8s-node-audit", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-k8s-node-audit"), - }, - { - Name: "ForcedMachineRemoval", - Description: ` -Tests different scenarios for forced Machine removal (vs. graceful removing from a cluster): - -- force remove a Machine which is not allocated (not part of any cluster) -- force remove a worker Machine which is part of the cluster -- force remove a control plane Machine which is part of the cluster, and replace with a new Machine. - -These tests simulate a hardware failure of a Machine which requires a forced removal from Omni. - -In the tests, we wipe and reboot the VMs to bring them back as available for the next test.`, - Parallel: true, - MachineClaim: 4, - Subtests: subTests( - // this test will force-remove a machine, but it will bring it back, so pool of available will be still 4 machines - subTest{ - "UnallocatedMachinesShouldBeDestroyable", - AssertUnallocatedMachineDestroyFlow(ctx, rootClient.Omni().State(), options.RestartAMachineFunc), - }, - // this test consumes all 4 available machines and creates a cluster - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-forced-removal", - ControlPlanes: 3, - Workers: 1, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterShouldBeReady(ctx, rootClient, "integration-forced-removal", options.MachineOptions.TalosVersion, talosAPIKeyPrepare)..., - ).Append( - // this test will force-remove a worker, so the cluster will be 3+0, and 1 available machine - subTest{ - "WorkerNodesShouldBeForceRemovable", - AssertForceRemoveWorkerNode(ctx, rootClient.Omni().State(), "integration-forced-removal", options.FreezeAMachineFunc, options.WipeAMachineFunc), - }, - ).Append( - TestBlockClusterShouldBeReady(ctx, rootClient, "integration-forced-removal", options.MachineOptions.TalosVersion, talosAPIKeyPrepare)..., - ).Append( - // this test will add an available machine as a fourth control plane node, but then remove a frozen one, so the cluster is 3+0, and 1 available machine - subTest{ - "ControlPlaneNodeShouldBeForceReplaceable", - AssertControlPlaneForceReplaceMachine(ctx, rootClient.Omni().State(), "integration-forced-removal", options), - }, - ).Append( - TestBlockClusterShouldBeReady(ctx, rootClient, "integration-forced-removal", options.MachineOptions.TalosVersion, talosAPIKeyPrepare)..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-forced-removal", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-forced-removal"), - }, - { - Name: "ImmediateClusterDestruction", - Description: ` -Regression test: create a cluster and destroy it without waiting for the cluster to reach any state.`, - Parallel: true, - MachineClaim: 3, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-immediate", - ControlPlanes: 1, - Workers: 2, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - subTest{ - "ClusterShouldBeDestroyedImmediately", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-immediate", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-immediate"), - }, - TestGroupClusterCreateAndReady( - ctx, - rootClient, - talosAPIKeyPrepare, - "default", - ` -Create a regular 3 + 2 cluster with HA controlplane, assert that the cluster is ready and accessible. -Don't do any changes to the cluster.`, - ClusterOptions{ - ControlPlanes: 3, - Workers: 2, - - MachineOptions: options.MachineOptions, - }, - options.OutputDir, - ), - TestGroupClusterCreateAndReady( - ctx, - rootClient, - talosAPIKeyPrepare, - "encrypted", - ` -Create a 1 + 1 cluster and enable disk encryption via Omni as a KMS. -Don't do any changes to the cluster.`, - ClusterOptions{ - ControlPlanes: 1, - Workers: 1, - - MachineOptions: options.MachineOptions, - Features: &specs.ClusterSpec_Features{ - DiskEncryption: true, - }, - }, - options.OutputDir, - ), - TestGroupClusterCreateAndReady( - ctx, - rootClient, - talosAPIKeyPrepare, - "singlenode", - ` -Create a single node cluster. -Don't do any changes to the cluster.`, - ClusterOptions{ - ControlPlanes: 1, - Workers: 0, - - MachineOptions: options.MachineOptions, - }, - options.OutputDir, - ), - { - Name: "ScaleUpAndDown", - Description: ` -Tests scaling up and down a cluster: - -- create a 1+0 cluster -- scale up to 1+1 -- scale up to 3+1 -- scale down to 3+0 -- scale down to 1+0 - -In between the scaling operations, assert that the cluster is ready and accessible.`, - Parallel: true, - MachineClaim: 4, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-scaling", - ControlPlanes: 1, - Workers: 0, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "OneWorkerShouldBeAdded", - ScaleClusterUp(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling", - ControlPlanes: 0, - Workers: 1, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "TwoControlPlanesShouldBeAdded", - ScaleClusterUp(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling", - ControlPlanes: 2, - Workers: 0, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "OneWorkerShouldBeRemoved", - ScaleClusterDown(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling", - ControlPlanes: 0, - Workers: -1, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "TwoControlPlanesShouldBeRemoved", - ScaleClusterDown(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling", - ControlPlanes: -2, - Workers: 0, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-scaling", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-scaling"), - }, - { - Name: "ScaleUpAndDownMachineClassBasedMachineSets", - Description: ` -Tests scaling up and down a cluster using machine classes: - -- create a 1+0 cluster -- scale up to 1+1 -- scale up to 3+1 -- scale down to 3+0 -- scale down to 1+0 - -In between the scaling operations, assert that the cluster is ready and accessible.`, - Parallel: true, - MachineClaim: 4, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateClusterWithMachineClass(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-machine-class-based-machine-sets", - ControlPlanes: 1, - Workers: 0, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-machine-class-based-machine-sets", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "OneWorkerShouldBeAdded", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-machine-class-based-machine-sets", - ControlPlanes: 0, - Workers: 1, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-machine-class-based-machine-sets", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "TwoControlPlanesShouldBeAdded", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-machine-class-based-machine-sets", - ControlPlanes: 2, - Workers: 0, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-machine-class-based-machine-sets", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "OneWorkerShouldBeRemoved", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-machine-class-based-machine-sets", - ControlPlanes: 0, - Workers: -1, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-machine-class-based-machine-sets", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "TwoControlPlanesShouldBeRemoved", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-machine-class-based-machine-sets", - ControlPlanes: -2, - Workers: 0, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-machine-class-based-machine-sets", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-scaling-machine-class-based-machine-sets", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-scaling-machine-class-based-machine-sets"), - }, - { - Name: "ScaleUpAndDownAutoProvisionMachineSets", - Description: ` -Tests scaling up and down a cluster using infrastructure provisioner: - -- create a 1+0 cluster -- scale up to 1+1 -- scale up to 3+1 -- scale down to 3+0 -- scale down to 1+0 - -In between the scaling operations, assert that the cluster is ready and accessible.`, - Parallel: true, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateClusterWithMachineClass(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-auto-provision", - ControlPlanes: 1, - Workers: 0, - InfraProvider: options.defaultInfraProvider(), - - MachineOptions: options.MachineOptions, - ProviderData: options.defaultProviderData(), - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-auto-provision", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "OneWorkerShouldBeAdded", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-auto-provision", - ControlPlanes: 0, - Workers: 1, - InfraProvider: options.defaultInfraProvider(), - MachineOptions: options.MachineOptions, - ProviderData: options.defaultProviderData(), - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-auto-provision", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "TwoControlPlanesShouldBeAdded", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-auto-provision", - ControlPlanes: 2, - Workers: 0, - MachineOptions: options.MachineOptions, - ProviderData: options.defaultInfraProvider(), - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-auto-provision", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "OneWorkerShouldBeRemoved", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-auto-provision", - ControlPlanes: 0, - Workers: -1, - InfraProvider: options.defaultInfraProvider(), - MachineOptions: options.MachineOptions, - ProviderData: options.defaultProviderData(), - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-auto-provision", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "TwoControlPlanesShouldBeRemoved", - ScaleClusterMachineSets(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-scaling-auto-provision", - ControlPlanes: -2, - Workers: 0, - InfraProvider: options.defaultInfraProvider(), - MachineOptions: options.MachineOptions, - ProviderData: options.defaultProviderData(), - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-scaling-auto-provision", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-scaling-auto-provision", true, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-scaling-auto-provision"), - }, - { - Name: "RollingUpdateParallelism", - Description: ` -Tests rolling update & scale down strategies for concurrency control for worker machine sets. - -- create a 1+3 cluster -- update the worker configs with rolling strategy using maxParallelism of 2 -- scale down the workers to 0 with rolling strategy using maxParallelism of 2 -- assert that the maxParallelism of 2 was respected and used in both operations,`, - Parallel: true, - MachineClaim: 4, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-rolling-update-parallelism", - ControlPlanes: 1, - Workers: 3, - - MachineOptions: options.MachineOptions, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-rolling-update-parallelism", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "WorkersUpdateShouldBeRolledOutWithMaxParallelism", - AssertWorkerNodesRollingConfigUpdate(ctx, rootClient, "integration-rolling-update-parallelism", 2), - }, - subTest{ - "WorkersShouldScaleDownWithMaxParallelism", - AssertWorkerNodesRollingScaleDown(ctx, rootClient, "integration-rolling-update-parallelism", 2), - }, - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-rolling-update-parallelism", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-rolling-update-parallelism"), - }, - { - Name: "ReplaceControlPlanes", - Description: ` -Tests replacing control plane nodes: - -- create a 1+0 cluster -- scale up to 2+0, and immediately remove the first control plane node - -In between the scaling operations, assert that the cluster is ready and accessible.`, - Parallel: true, - MachineClaim: 2, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-replace-cp", - ControlPlanes: 1, - Workers: 0, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-replace-cp", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ControlPlanesShouldBeReplaced", - ReplaceControlPlanes(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-replace-cp", - - MachineOptions: options.MachineOptions, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-replace-cp", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-replace-cp", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-replace-cp"), - }, - { - Name: "ConfigPatching", - Description: ` -Tests applying various config patching, including "broken" config patches which should not apply.`, - Parallel: true, - MachineClaim: 4, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-config-patching", - ControlPlanes: 3, - Workers: 1, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-config-patching", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "LargeImmediateConfigPatchShouldBeAppliedAndRemoved", - AssertLargeImmediateConfigApplied(ctx, rootClient, "integration-config-patching", talosAPIKeyPrepare), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-config-patching", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "MachineSetConfigPatchShouldBeAppliedAndRemoved", - AssertConfigPatchMachineSet(ctx, rootClient, "integration-config-patching"), - }, - subTest{ - "SingleClusterMachineConfigPatchShouldBeAppliedAndRemoved", - AssertConfigPatchSingleClusterMachine(ctx, rootClient, "integration-config-patching"), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-config-patching", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ConfigPatchWithRebootShouldBeApplied", - AssertConfigPatchWithReboot(ctx, rootClient, "integration-config-patching", talosAPIKeyPrepare), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-config-patching", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "InvalidConfigPatchShouldNotBeApplied", - AssertConfigPatchWithInvalidConfig(ctx, rootClient, "integration-config-patching", talosAPIKeyPrepare), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-config-patching", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-config-patching", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-config-patching"), - }, - { - Name: "TalosUpgrades", - Description: ` -Tests upgrading Talos version, including reverting a failed upgrade.`, - Parallel: true, - MachineClaim: 4, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-talos-upgrade", - ControlPlanes: 3, - Workers: 1, - - MachineOptions: MachineOptions{ - TalosVersion: options.AnotherTalosVersion, - KubernetesVersion: options.AnotherKubernetesVersion, // use older Kubernetes compatible with AnotherTalosVersion - }, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-talos-upgrade", - options.AnotherTalosVersion, - options.AnotherKubernetesVersion, - talosAPIKeyPrepare, - )..., - ).AppendIf( - !options.SkipExtensionsCheckOnCreate, - subTest{ - "HelloWorldServiceExtensionShouldBePresent", - AssertExtensionIsPresent(ctx, rootClient, "integration-talos-upgrade", HelloWorldServiceExtensionName), - }, - ).Append( - subTest{ - "TalosSchematicUpdateShouldSucceed", - AssertTalosSchematicUpdateFlow(ctx, rootClient, "integration-talos-upgrade"), - }, - subTest{ - "QemuGuestAgentExtensionShouldBePresent", - AssertExtensionIsPresent(ctx, rootClient, "integration-talos-upgrade", QemuGuestAgentExtensionName), - }, - subTest{ - "ClusterBootstrapManifestSyncShouldBeSuccessful", - KubernetesBootstrapManifestSync(ctx, rootClient.Management(), "integration-talos-upgrade"), - }, - ).Append( - subTest{ - "TalosUpgradeShouldSucceed", - AssertTalosUpgradeFlow(ctx, rootClient.Omni().State(), "integration-talos-upgrade", options.MachineOptions.TalosVersion), - }, - subTest{ - "ClusterBootstrapManifestSyncShouldBeSuccessful", - KubernetesBootstrapManifestSync(ctx, rootClient.Management(), "integration-talos-upgrade"), - }, - ).AppendIf( - !options.SkipExtensionsCheckOnCreate, - subTest{ - "HelloWorldServiceExtensionShouldBePresent", - AssertExtensionIsPresent(ctx, rootClient, "integration-talos-upgrade", HelloWorldServiceExtensionName), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-talos-upgrade", - options.MachineOptions.TalosVersion, - options.AnotherKubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "FailedTalosUpgradeShouldBeRevertible", - AssertTalosUpgradeIsRevertible(ctx, rootClient.Omni().State(), "integration-talos-upgrade", options.MachineOptions.TalosVersion), - }, - ).Append( - subTest{ - "RunningTalosUpgradeShouldBeCancelable", - AssertTalosUpgradeIsCancelable(ctx, rootClient.Omni().State(), "integration-talos-upgrade", options.MachineOptions.TalosVersion, options.AnotherTalosVersion), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-talos-upgrade", - options.MachineOptions.TalosVersion, - options.AnotherKubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "MaintenanceTestConfigShouldStillBePresent", - AssertMaintenanceTestConfigIsPresent(ctx, rootClient.Omni().State(), "integration-talos-upgrade", 0), // check the maintenance config in the first machine - }, - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-talos-upgrade", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-talos-upgrade"), - }, - { - Name: "KubernetesUpgrades", - Description: ` -Tests upgrading Kubernetes version, including reverting a failed upgrade.`, - Parallel: true, - MachineClaim: 4, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-k8s-upgrade", - ControlPlanes: 3, - Workers: 1, - - MachineOptions: MachineOptions{ - TalosVersion: options.MachineOptions.TalosVersion, - KubernetesVersion: options.AnotherKubernetesVersion, - }, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-k8s-upgrade", - options.MachineOptions.TalosVersion, - options.AnotherKubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "KubernetesUpgradeShouldSucceed", - AssertKubernetesUpgradeFlow( - ctx, rootClient.Omni().State(), rootClient.Management(), - "integration-k8s-upgrade", - options.MachineOptions.KubernetesVersion, - ), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-k8s-upgrade", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "FailedKubernetesUpgradeShouldBeRevertible", - AssertKubernetesUpgradeIsRevertible(ctx, rootClient.Omni().State(), "integration-k8s-upgrade", options.MachineOptions.KubernetesVersion), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-k8s-upgrade", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-k8s-upgrade", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-k8s-upgrade"), - }, - { - Name: "EtcdBackupAndRestore", - Description: ` -Tests automatic & manual backup & restore for workload etcd. - -Automatic backups are enabled, done, and then a manual backup is created. -Afterwards, a cluster's control plane is destroyed then recovered from the backup. - -Finally, a completely new cluster is created using the same backup to test the "point-in-time recovery".`, - Parallel: true, - MachineClaim: 6, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-etcd-backup", - ControlPlanes: 3, - Workers: 1, - - EtcdBackup: &specs.EtcdBackupConf{ - Interval: durationpb.New(2 * time.Hour), - Enabled: true, - }, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-etcd-backup", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - TestBlockKubernetesDeploymentCreateAndRunning(ctx, rootClient.Management(), "integration-etcd-backup", - "default", - "test", - )..., - ).Append( - subTest{ - "KubernetesSecretShouldBeCreated", - AssertKubernetesSecretIsCreated(ctx, rootClient.Management(), "integration-etcd-backup", "default", "test", "backup-test-secret-val"), - }, - subTest{ - "EtcdAutomaticBackupShouldBeCreated", - AssertEtcdAutomaticBackupIsCreated(ctx, rootClient.Omni().State(), "integration-etcd-backup"), - }, - subTest{ - "EtcdManualBackupShouldBeCreated", - AssertEtcdManualBackupIsCreated(ctx, rootClient.Omni().State(), "integration-etcd-backup"), - }, - ).Append( - TestBlockCreateClusterFromEtcdBackup(ctx, rootClient, talosAPIKeyPrepare, options, - "integration-etcd-backup", - "integration-etcd-backup-new-cluster", - "default", - "test", - )..., - ).Append( - subTest{ - "EtcdSecretShouldBeSameAfterCreateFromBackup", - AssertKubernetesSecretHasValue(ctx, rootClient.Management(), "integration-etcd-backup-new-cluster", "default", "test", "backup-test-secret-val"), - }, - subTest{ - "NewClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-etcd-backup-new-cluster", false, false), - }, - ).Append( - TestBlockRestoreEtcdFromLatestBackup(ctx, rootClient, talosAPIKeyPrepare, options, 3, - "integration-etcd-backup", - "default", - "test", - )..., - ).Append( - subTest{ - "RestoredClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-etcd-backup", false, false), - }, - ), - Finalizer: func(t *testing.T) { - DestroyCluster(ctx, rootClient, options.OutputDir, "integration-etcd-backup")(t) - DestroyCluster(ctx, rootClient, options.OutputDir, "integration-etcd-backup-new-cluster")(t) - }, - }, - { - Name: "MaintenanceUpgrade", - Description: ` - Test upgrading (downgrading) a machine in maintenance mode. - - Create a cluster out of a single machine on version1, remove cluster (the machine will stay on version1, Talos is installed). - Create a cluster out of the same machine on version2, Omni should upgrade the machine to version2 while in maintenance. - `, - Parallel: true, - MachineClaim: 1, - Subtests: subTests( - subTest{ - "MachineShouldBeUpgradedInMaintenanceMode", - AssertMachineShouldBeUpgradedInMaintenanceMode( - ctx, rootClient, - "integration-maintenance-upgrade", - options.AnotherKubernetesVersion, - options.MachineOptions.TalosVersion, - options.AnotherTalosVersion, - talosAPIKeyPrepare, - ), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-maintenance-upgrade"), - }, - { - Name: "Auth", - Description: ` -Test authorization on accessing Omni API, some tests run without a cluster, some only run with a context of a cluster.`, - MachineClaim: 1, - Parallel: true, - Subtests: subTests( - subTest{ - "AnonymousRequestShouldBeDenied", - AssertAnonymousAuthenication(ctx, rootClient), - }, - subTest{ - "InvalidSignatureShouldBeDenied", - AssertAPIInvalidSignature(ctx, rootClient), - }, - subTest{ - "PublicKeyWithoutLifetimeShouldNotBeRegistered", - AssertPublicKeyWithoutLifetimeNotRegistered(ctx, rootClient), - }, - subTest{ - "PublicKeyWithLongLifetimeShouldNotBeRegistered", - AssertPublicKeyWithLongLifetimeNotRegistered(ctx, rootClient), - }, - subTest{ - "OmniconfigShouldBeDownloadable", - AssertOmniconfigDownload(ctx, rootClient), - }, - subTest{ - "PublicKeyWithUnknownEmailShouldNotBeRegistered", - AssertRegisterPublicKeyWithUnknownEmail(ctx, rootClient), - }, - subTest{ - "ServiceAccountAPIShouldWork", - AssertServiceAccountAPIFlow(ctx, rootClient), - }, - subTest{ - "ResourceAuthzShouldWork", - AssertResourceAuthz(ctx, rootClient, clientConfig), - }, - subTest{ - "ResourceAuthzWithACLShouldWork", - AssertResourceAuthzWithACL(ctx, rootClient, clientConfig), - }, - ).Append( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-auth", - ControlPlanes: 1, - Workers: 0, - - Features: &specs.ClusterSpec_Features{ - UseEmbeddedDiscoveryService: true, - }, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-auth", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "APIAuthorizationShouldBeTested", - AssertAPIAuthz(ctx, rootClient, clientConfig, "integration-auth"), - }, - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-auth", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-auth"), - }, - { - Name: "ClusterTemplate", - Description: ` -Test flow of cluster creation and scaling using cluster templates.`, - Parallel: true, - MachineClaim: 5, - Subtests: []subTest{ - { - "TestClusterTemplateFlow", - AssertClusterTemplateFlow(ctx, rootClient.Omni().State(), options.MachineOptions), - }, - }, - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "tmpl-cluster"), - }, - { - Name: "WorkloadProxy", - Description: "Test workload service proxying feature", - Parallel: true, - MachineClaim: 1, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-workload-proxy", - ControlPlanes: 1, - Workers: 0, - - Features: &specs.ClusterSpec_Features{ - EnableWorkloadProxy: true, - }, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, - }, - ), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-workload-proxy", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "WorkloadProxyShouldBeTested", - AssertWorkloadProxy(ctx, rootClient, "integration-workload-proxy"), - }, - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-workload-proxy", false, false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-workload-proxy"), - }, - { - Name: "StaticInfraProvider", - Description: ` -Tests common Omni operations on machines created by a static infrastructure provider:, -Note: this test expects all machines to be provisioned by the bare-metal infra provider as it doesn't filter them. - -- create a 1+0 cluster - assert that cluster is healthy and ready -- scale it up to be 3+1 - assert that cluster is healthy and ready -- assert that machines are not ready to use (occupied) -- scale it down to be 1+0 - assert that cluster is healthy and ready -- destroy the cluster - assert that machines are wiped, then marked as ready to use -- create a new 3+1 cluster -- assert that cluster is healthy and ready -- remove links of the machines -`, - Parallel: true, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-static-infra-provider", - ControlPlanes: 1, - Workers: 0, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: true, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-static-infra-provider", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeScaledUp", - ScaleClusterUp(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-static-infra-provider", - ControlPlanes: 2, - Workers: 1, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-static-infra-provider", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ExtensionsShouldBeUpdated", - UpdateExtensions(ctx, rootClient, "integration-static-infra-provider", []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}), - }, - subTest{ - "MachinesShouldBeAllocated", - AssertInfraMachinesAreAllocated(ctx, rootClient.Omni().State(), "integration-static-infra-provider", - options.MachineOptions.TalosVersion, []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}), - }, - ).Append( - subTest{ - "ClusterShouldBeScaledDown", - ScaleClusterDown(ctx, rootClient.Omni().State(), ClusterOptions{ - Name: "integration-static-infra-provider", - ControlPlanes: -2, - Workers: -1, - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-static-infra-provider", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider", false, true), - }, - ).Append( - subTest{ - "ClusterShouldBeRecreated", - CreateCluster(ctx, rootClient, ClusterOptions{ - Name: "integration-static-infra-provider", - ControlPlanes: 3, - Workers: 1, - - MachineOptions: options.MachineOptions, - ScalingTimeout: options.ScalingTimeout, - - SkipExtensionCheckOnCreate: true, - }), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( - ctx, rootClient, - "integration-static-infra-provider", - options.MachineOptions.TalosVersion, - options.MachineOptions.KubernetesVersion, - talosAPIKeyPrepare, - )..., - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider", false, true), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, options.OutputDir, "integration-static-infra-provider"), - }, - } - - var re *regexp.Regexp - - if options.RunTestPattern != "" { - if re, err = regexp.Compile(options.RunTestPattern); err != nil { - log.Printf("run test pattern parse error: %s", err) - - return err - } - } - - var testsToRun []testGroup - - for _, group := range testList { - if re == nil || re.MatchString(group.Name) { - testsToRun = append(testsToRun, group) - - continue - } - - matchedGroup := group - matchedGroup.Subtests = xslices.Filter(matchedGroup.Subtests, func(test subTest) bool { - fullName := fmt.Sprintf("%s/%s", group.Name, test.Name) - - return re.MatchString(fullName) - }) - - if len(matchedGroup.Subtests) > 0 { - testsToRun = append(testsToRun, matchedGroup) - } - } - - for _, group := range testsToRun { - if group.MachineClaim > options.ExpectedMachines { - return fmt.Errorf("test group %q requires %d machines, but only %d are expected", group.Name, group.MachineClaim, options.ExpectedMachines) - } - } - - preRunTests := []testing.InternalTest{} - - if options.provisionMachines() { - for i, cfg := range options.ProvisionConfigs { - if cfg.Provider.Static { - preRunTests = append(preRunTests, testing.InternalTest{ - Name: "AcceptMachines", - F: AcceptInfraMachines(ctx, rootClient.Omni().State(), cfg.Provider.ID, cfg.MachineCount, true), // disable kexec to test full reboot over the provider - }) - - continue - } - - preRunTests = append(preRunTests, testing.InternalTest{ - Name: "AssertMachinesShouldBeProvisioned", - F: AssertMachinesShouldBeProvisioned(ctx, rootClient, cfg, fmt.Sprintf("provisioned%d", i), options.MachineOptions.TalosVersion), - }) - } - } - - if len(preRunTests) > 0 { - if err = runTests(preRunTests); err != nil { - return err - } - } - - machineSemaphore := semaphore.NewWeighted(int64(options.ExpectedMachines)) - - if err = runTests(makeTests(ctx, testsToRun, machineSemaphore)); err != nil { - return err - } - - postRunTests := []testing.InternalTest{} - - if options.provisionMachines() { - for i, cfg := range options.ProvisionConfigs { - if cfg.Provider.Static { - postRunTests = append(postRunTests, testing.InternalTest{ - Name: "InfraMachinesShouldBeDestroyed", - F: DestroyInfraMachines(ctx, rootClient.Omni().State(), cfg.Provider.ID, cfg.MachineCount), - }) - - continue - } - - postRunTests = append(postRunTests, testing.InternalTest{ - Name: "AssertMachinesShouldBeDeprovisioned", - F: AssertMachinesShouldBeDeprovisioned(ctx, rootClient, fmt.Sprintf("provisioned%d", i)), - }) - } - } - - if options.RunStatsCheck { - postRunTests = append(postRunTests, testing.InternalTest{ - Name: "AssertStatsLimits", - F: AssertStatsLimits(ctx), - }) - } - - if len(postRunTests) > 0 { - if err = runTests(postRunTests); err != nil { - return err - } - } - - if options.CleanupLinks { - if err := cleanupLinks(ctx, rootClient.Omni().State()); err != nil { - return err - } - } - - return nil -} - -func runTests(testsToRun []testing.InternalTest) error { - exitCode := testing.MainStart( - matchStringOnly(func(string, string) (bool, error) { return true, nil }), - testsToRun, - nil, - nil, - nil, - ).Run() - - if exitCode != 0 { - return fmt.Errorf("test failed") - } - - return nil -} - -func cleanupLinks(ctx context.Context, st state.State) error { - links, err := safe.ReaderListAll[*siderolink.Link](ctx, st) - if err != nil { - return err - } - - var cancel context.CancelFunc - - ctx, cancel = context.WithTimeout(ctx, time.Minute) - defer cancel() - - return links.ForEachErr(func(r *siderolink.Link) error { - err := st.TeardownAndDestroy(ctx, r.Metadata()) - if err != nil && !state.IsNotFoundError(err) { - return err - } - - return nil - }) -} - -func makeTests(ctx context.Context, testsToRun []testGroup, machineSemaphore *semaphore.Weighted, tests ...testing.InternalTest) []testing.InternalTest { - groups := xslices.Map(testsToRun, func(group testGroup) testing.InternalTest { - return testing.InternalTest{ - Name: group.Name, - F: func(t *testing.T) { - if group.Parallel { - t.Parallel() - } - - assert.NotEmpty(t, group.Name) - - t.Logf("[%s]:\n%s", group.Name, strings.TrimSpace(group.Description)) - - if group.MachineClaim > 0 { - t.Logf("attempting to acquire semaphore for %d machines", group.MachineClaim) - - if err := machineSemaphore.Acquire(ctx, int64(group.MachineClaim)); err != nil { - t.Fatalf("failed to acquire machine semaphore: %s", err) - } - - t.Logf("acquired semaphore for %d machines", group.MachineClaim) - - t.Cleanup(func() { - t.Logf("releasing semaphore for %d machines", group.MachineClaim) - - machineSemaphore.Release(int64(group.MachineClaim)) - }) - } - - var testGroupFailed bool - - for _, elem := range group.Subtests { - testGroupFailed = !t.Run(elem.Name, elem.F) - if testGroupFailed { - break - } - } - - if testGroupFailed && group.Finalizer != nil { - t.Logf("running finalizer, as the test group failed") - - group.Finalizer(t) - } - }, - } - }) - - return append(groups, tests...) -} - -//nolint:govet -type testGroup struct { - Name string - Description string - Parallel bool - MachineClaim int - Subtests []subTest - Finalizer func(t *testing.T) -} - -//nolint:govet -type subTest struct { - Name string - F func(t *testing.T) -} - -type subTestList []subTest - -func subTests(items ...subTest) subTestList { - return items -} - -func (l subTestList) Append(items ...subTest) subTestList { - return append(l, items...) -} - -func (l subTestList) AppendIf(condition bool, items ...subTest) subTestList { - if !condition { - return l - } - - return append(l, items...) -} diff --git a/cmd/integration-test/pkg/tests/utils.go b/cmd/integration-test/pkg/tests/utils.go deleted file mode 100644 index 157efea3..00000000 --- a/cmd/integration-test/pkg/tests/utils.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2025 Sidero Labs, Inc. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. - -package tests - -import ( - "errors" - "io" - "reflect" - "time" -) - -type corpusEntry = struct { - Parent string - Path string - Data []byte - Values []any - Generation int - IsSeed bool -} - -var errMain = errors.New("testing: unexpected use of func Main") - -type matchStringOnly func(pat, str string) (bool, error) - -func (f matchStringOnly) MatchString(pat, str string) (bool, error) { return f(pat, str) } - -func (f matchStringOnly) StartCPUProfile(io.Writer) error { return errMain } - -func (f matchStringOnly) StopCPUProfile() {} - -func (f matchStringOnly) WriteProfileTo(string, io.Writer, int) error { return errMain } - -func (f matchStringOnly) ImportPath() string { return "" } - -func (f matchStringOnly) StartTestLog(io.Writer) {} - -func (f matchStringOnly) StopTestLog() error { return errMain } - -func (f matchStringOnly) SetPanicOnExit0(bool) {} - -func (f matchStringOnly) CoordinateFuzzing(time.Duration, int64, time.Duration, int64, int, []corpusEntry, []reflect.Type, string, string) error { - return nil -} - -func (f matchStringOnly) RunFuzzWorker(func(corpusEntry) error) error { return nil } - -func (f matchStringOnly) ReadCorpus(string, []reflect.Type) ([]corpusEntry, error) { - return nil, nil -} - -func (f matchStringOnly) CheckCorpus([]any, []reflect.Type) error { return nil } - -func (f matchStringOnly) ResetCoverage() {} -func (f matchStringOnly) SnapshotCoverage() {} - -func (f matchStringOnly) InitRuntimeCoverage() (mode string, tearDown func(coverprofile string, gocoverdir string) (string, error), snapcov func() float64) { - return "", func(string, string) (string, error) { return "", nil }, func() float64 { return 0 } -} diff --git a/cmd/integration-test/make-cookies/main.go b/cmd/make-cookies/main.go similarity index 95% rename from cmd/integration-test/make-cookies/main.go rename to cmd/make-cookies/main.go index 6cb2718c..eef764a3 100644 --- a/cmd/integration-test/make-cookies/main.go +++ b/cmd/make-cookies/main.go @@ -12,8 +12,8 @@ import ( "net/http" "os" - "github.com/siderolabs/omni/cmd/integration-test/pkg/clientconfig" "github.com/siderolabs/omni/internal/backend/workloadproxy" + "github.com/siderolabs/omni/internal/pkg/clientconfig" ) func main() { diff --git a/frontend/package.json b/frontend/package.json index af48f174..d7f1b474 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -12,8 +12,8 @@ "dependencies": { "@auth0/auth0-vue": "^2.4.0", "@headlessui/vue": "^1.7.23", - "@jsonforms/vue": "^3.5.1", "@jsonforms/vue-vanilla": "^3.5.1", + "@jsonforms/vue": "^3.5.1", "@kubernetes/client-node": "^0.22.3", "apexcharts": "3.45.2", "click-outside-vue3": "^4.0.1", diff --git a/hack/test/integration.sh b/hack/test/integration.sh index 3d3dfd51..dc78a7a6 100755 --- a/hack/test/integration.sh +++ b/hack/test/integration.sh @@ -164,15 +164,17 @@ if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then SSL_CERT_DIR=hack/certs:/etc/ssl/certs \ ${ARTIFACTS}/integration-test-linux-amd64 \ - --endpoint https://my-instance.localhost:8099 \ - --talos-version=${TALOS_VERSION} \ - --omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \ - --expected-machines=30 \ - --provision-config-file=hack/test/provisionconfig.yaml \ - --output-dir="${TEST_OUTPUTS_DIR}" \ - --run-stats-check \ - -t 10m \ - -p 10 \ + --omni.endpoint https://my-instance.localhost:8099 \ + --omni.talos-version=${TALOS_VERSION} \ + --omni.omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \ + --omni.expected-machines=30 \ + --omni.provision-config-file=hack/test/provisionconfig.yaml \ + --omni.output-dir="${TEST_OUTPUTS_DIR}" \ + --omni.run-stats-check \ + --test.timeout 10m \ + --test.parallel 10 \ + --test.failfast \ + --test.v \ ${TALEMU_TEST_ARGS:-} docker rm -f "$PROMETHEUS_CONTAINER" @@ -272,10 +274,12 @@ sleep 5 SSL_CERT_DIR=hack/certs:/etc/ssl/certs \ ${ARTIFACTS}/integration-test-linux-amd64 \ - --endpoint https://my-instance.localhost:8099 \ - --talos-version=${TALOS_VERSION} \ - --omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \ - --expected-machines=8 `# equal to the masters+workers above` \ + --omni.endpoint https://my-instance.localhost:8099 \ + --omni.talos-version=${TALOS_VERSION} \ + --omni.omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \ + --omni.expected-machines=8 `# equal to the masters+workers above` \ + --test.failfast \ + --test.v \ ${INTEGRATION_TEST_ARGS:-} if [ "${INTEGRATION_RUN_E2E_TEST:-true}" == "true" ]; then diff --git a/cmd/integration-test/pkg/tests/auth.go b/internal/integration/auth_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/auth.go rename to internal/integration/auth_test.go index 197d00aa..c5029c53 100644 --- a/cmd/integration-test/pkg/tests/auth.go +++ b/internal/integration/auth_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" @@ -55,11 +57,11 @@ import ( "github.com/siderolabs/omni/client/pkg/omni/resources/siderolink" "github.com/siderolabs/omni/client/pkg/omni/resources/system" "github.com/siderolabs/omni/client/pkg/omni/resources/virtual" - "github.com/siderolabs/omni/cmd/integration-test/pkg/clientconfig" "github.com/siderolabs/omni/internal/backend/runtime/omni/infraprovider" "github.com/siderolabs/omni/internal/backend/runtime/omni/validated" "github.com/siderolabs/omni/internal/pkg/auth" "github.com/siderolabs/omni/internal/pkg/auth/role" + "github.com/siderolabs/omni/internal/pkg/clientconfig" "github.com/siderolabs/omni/internal/pkg/grpcutil" ) diff --git a/cmd/integration-test/pkg/tests/backup.go b/internal/integration/backup_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/backup.go rename to internal/integration/backup_test.go index 3d170919..575d403e 100644 --- a/cmd/integration-test/pkg/tests/backup.go +++ b/internal/integration/backup_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/blocks.go b/internal/integration/blocks_test.go similarity index 68% rename from cmd/integration-test/pkg/tests/blocks.go rename to internal/integration/blocks_test.go index 98f72a18..a7d7046e 100644 --- a/cmd/integration-test/pkg/tests/blocks.go +++ b/internal/integration/blocks_test.go @@ -3,11 +3,12 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" - "strings" "time" "github.com/cosi-project/runtime/pkg/resource" @@ -18,8 +19,8 @@ import ( "github.com/siderolabs/omni/client/pkg/omni/resources/omni" ) -// TestBlockClusterShouldBeReady is a reusable block of assertions that can be used to verify that a cluster is fully ready. -func TestBlockClusterShouldBeReady(ctx context.Context, rootClient *client.Client, clusterName, +// AssertBlockClusterShouldBeReady is a reusable block of assertions that can be used to verify that a cluster is fully ready. +func AssertBlockClusterShouldBeReady(ctx context.Context, rootClient *client.Client, clusterName, expectedTalosVersion string, talosAPIKeyPrepare TalosAPIKeyPrepareFunc, ) subTestList { //nolint:nolintlint,revive return subTestList{ @@ -63,8 +64,8 @@ func TestBlockClusterShouldBeReady(ctx context.Context, rootClient *client.Clien } } -// TestBlockProxyAPIAccessShouldWork is a reusable block of assertions that can be used to verify that Omni API proxies work. -func TestBlockProxyAPIAccessShouldWork(ctx context.Context, rootClient *client.Client, clusterName string, talosAPIKeyPrepare TalosAPIKeyPrepareFunc) []subTest { //nolint:nolintlint,revive +// AssertBlockProxyAPIAccessShouldWork is a reusable block of assertions that can be used to verify that Omni API proxies work. +func AssertBlockProxyAPIAccessShouldWork(ctx context.Context, rootClient *client.Client, clusterName string, talosAPIKeyPrepare TalosAPIKeyPrepareFunc) []subTest { //nolint:nolintlint,revive return []subTest{ { "ClusterKubernetesAPIShouldBeAccessibleViaOmni", @@ -77,18 +78,18 @@ func TestBlockProxyAPIAccessShouldWork(ctx context.Context, rootClient *client.C } } -// TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady is a reusable block of assertions that can be used to verify +// AssertBlockClusterAndTalosAPIAndKubernetesShouldBeReady is a reusable block of assertions that can be used to verify // that a cluster is fully ready and that Omni API proxies work, and Kubernetes version is correct, and Kubernetes usage // metrics were collected. // -// This block is a bit slower than TestBlockClusterShouldBeReady, because it also verifies Kubernetes version. -func TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( +// This block is a bit slower than TestsBlockClusterShouldBeReady, because it also verifies Kubernetes version. +func AssertBlockClusterAndTalosAPIAndKubernetesShouldBeReady( ctx context.Context, rootClient *client.Client, clusterName, expectedTalosVersion, expectedKubernetesVersion string, talosAPIKeyPrepare TalosAPIKeyPrepareFunc, ) []subTest { //nolint:nolintlint,revive - return TestBlockClusterShouldBeReady(ctx, rootClient, clusterName, expectedTalosVersion, talosAPIKeyPrepare). - Append(TestBlockProxyAPIAccessShouldWork(ctx, rootClient, clusterName, talosAPIKeyPrepare)...). + return AssertBlockClusterShouldBeReady(ctx, rootClient, clusterName, expectedTalosVersion, talosAPIKeyPrepare). + Append(AssertBlockProxyAPIAccessShouldWork(ctx, rootClient, clusterName, talosAPIKeyPrepare)...). Append( subTest{ "ClusterKubernetesVersionShouldBeCorrect", @@ -107,9 +108,9 @@ func TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( ) } -// TestBlockRestoreEtcdFromLatestBackup is a reusable block of assertions that can be used to verify that a +// AssertBlockRestoreEtcdFromLatestBackup is a reusable block of assertions that can be used to verify that a // cluster's control plane can be broken, destroyed and then restored from an etcd backup. -func TestBlockRestoreEtcdFromLatestBackup(ctx context.Context, rootClient *client.Client, talosAPIKeyPrepare TalosAPIKeyPrepareFunc, +func AssertBlockRestoreEtcdFromLatestBackup(ctx context.Context, rootClient *client.Client, talosAPIKeyPrepare TalosAPIKeyPrepareFunc, options Options, controlPlaneNodeCount int, clusterName, assertDeploymentNS, assertDeploymentName string, ) subTestList { //nolint:nolintlint,revive return subTestList{ @@ -161,13 +162,13 @@ func TestBlockRestoreEtcdFromLatestBackup(ctx context.Context, rootClient *clien AssertKubernetesDeploymentHasRunningPods(ctx, rootClient.Management(), clusterName, assertDeploymentNS, assertDeploymentName), }, ).Append( - TestBlockKubernetesDeploymentCreateAndRunning(ctx, rootClient.Management(), clusterName, assertDeploymentNS, assertDeploymentName+"-after-restore")..., + AssertBlockKubernetesDeploymentCreateAndRunning(ctx, rootClient.Management(), clusterName, assertDeploymentNS, assertDeploymentName+"-after-restore")..., ) } -// TestBlockCreateClusterFromEtcdBackup is a reusable block of assertions that can be used to verify that a +// AssertBlockCreateClusterFromEtcdBackup is a reusable block of assertions that can be used to verify that a // new cluster can be created from another cluster's etcd backup. -func TestBlockCreateClusterFromEtcdBackup(ctx context.Context, rootClient *client.Client, talosAPIKeyPrepare TalosAPIKeyPrepareFunc, options Options, +func AssertBlockCreateClusterFromEtcdBackup(ctx context.Context, rootClient *client.Client, talosAPIKeyPrepare TalosAPIKeyPrepareFunc, options Options, sourceClusterName, newClusterName, assertDeploymentNS, assertDeploymentName string, ) subTestList { //nolint:nolintlint,revive return subTestList{ @@ -211,13 +212,13 @@ func TestBlockCreateClusterFromEtcdBackup(ctx context.Context, rootClient *clien AssertKubernetesDeploymentHasRunningPods(ctx, rootClient.Management(), newClusterName, assertDeploymentNS, assertDeploymentName), }, ).Append( - TestBlockKubernetesDeploymentCreateAndRunning(ctx, rootClient.Management(), newClusterName, assertDeploymentNS, assertDeploymentName+"-after-restore")..., + AssertBlockKubernetesDeploymentCreateAndRunning(ctx, rootClient.Management(), newClusterName, assertDeploymentNS, assertDeploymentName+"-after-restore")..., ) } -// TestBlockKubernetesDeploymentCreateAndRunning is a reusable block of assertions that can be used to verify that a +// AssertBlockKubernetesDeploymentCreateAndRunning is a reusable block of assertions that can be used to verify that a // Kubernetes deployment is created and has running pods. -func TestBlockKubernetesDeploymentCreateAndRunning(ctx context.Context, managementClient *management.Client, clusterName, ns, name string) []subTest { //nolint:nolintlint,revive +func AssertBlockKubernetesDeploymentCreateAndRunning(ctx context.Context, managementClient *management.Client, clusterName, ns, name string) []subTest { //nolint:nolintlint,revive return []subTest{ { "KubernetesDeploymentShouldBeCreated", @@ -230,41 +231,34 @@ func TestBlockKubernetesDeploymentCreateAndRunning(ctx context.Context, manageme } } -// TestGroupClusterCreateAndReady is a reusable group of tests that can be used to verify that a cluster is created and ready. -func TestGroupClusterCreateAndReady( +// AssertClusterCreateAndReady is a reusable group of tests that can be used to verify that a cluster is created and ready. +func AssertClusterCreateAndReady( ctx context.Context, rootClient *client.Client, talosAPIKeyPrepare TalosAPIKeyPrepareFunc, - name, description string, + name string, options ClusterOptions, testOutputDir string, -) testGroup { //nolint:nolintlint,revive +) []subTest { //nolint:nolintlint,revive clusterName := "integration-" + name options.Name = clusterName - return testGroup{ - Name: strings.ToUpper(name[0:1]) + name[1:] + "Cluster", - Description: description, - Parallel: true, - MachineClaim: options.ControlPlanes + options.Workers, - Subtests: subTests( - subTest{ - "ClusterShouldBeCreated", - CreateCluster(ctx, rootClient, options), - }, - ).Append( - TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady(ctx, rootClient, clusterName, options.MachineOptions.TalosVersion, options.MachineOptions.KubernetesVersion, talosAPIKeyPrepare)..., - ).Append( - subTest{ - "AssertSupportBundleContents", - AssertSupportBundleContents(ctx, rootClient, clusterName), - }, - ).Append( - subTest{ - "ClusterShouldBeDestroyed", - AssertDestroyCluster(ctx, rootClient.Omni().State(), clusterName, options.InfraProvider != "", false), - }, - ), - Finalizer: DestroyCluster(ctx, rootClient, testOutputDir, clusterName), - } + return subTests( + subTest{ + "ClusterShouldBeCreated", + CreateCluster(ctx, rootClient, options), + }, + ).Append( + AssertBlockClusterAndTalosAPIAndKubernetesShouldBeReady(ctx, rootClient, clusterName, options.MachineOptions.TalosVersion, options.MachineOptions.KubernetesVersion, talosAPIKeyPrepare)..., + ).Append( + subTest{ + "AssertSupportBundleContents", + AssertSupportBundleContents(ctx, rootClient, clusterName), + }, + ).Append( + subTest{ + "ClusterShouldBeDestroyed", + AssertDestroyCluster(ctx, rootClient.Omni().State(), clusterName, options.InfraProvider != "", false), + }, + ) } diff --git a/cmd/integration-test/pkg/tests/cleanup.go b/internal/integration/cleanup_test.go similarity index 95% rename from cmd/integration-test/pkg/tests/cleanup.go rename to internal/integration/cleanup_test.go index d8fbc1a6..ac857e0b 100644 --- a/cmd/integration-test/pkg/tests/cleanup.go +++ b/internal/integration/cleanup_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/cli.go b/internal/integration/cli_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/cli.go rename to internal/integration/cli_test.go index 0d0a9d40..ad344604 100644 --- a/cmd/integration-test/pkg/tests/cli.go +++ b/internal/integration/cli_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "bytes" diff --git a/cmd/integration-test/pkg/tests/cluster.go b/internal/integration/cluster_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/cluster.go rename to internal/integration/cluster_test.go index 46a3ec72..0741f4d2 100644 --- a/cmd/integration-test/pkg/tests/cluster.go +++ b/internal/integration/cluster_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" @@ -63,12 +65,6 @@ type ClusterOptions struct { SkipExtensionCheckOnCreate bool } -// MachineOptions are the options for machine creation. -type MachineOptions struct { - TalosVersion string - KubernetesVersion string -} - // CreateCluster verifies cluster creation. func CreateCluster(testCtx context.Context, cli *client.Client, options ClusterOptions) TestFunc { return func(t *testing.T) { diff --git a/cmd/integration-test/pkg/tests/common.go b/internal/integration/common_test.go similarity index 57% rename from cmd/integration-test/pkg/tests/common.go rename to internal/integration/common_test.go index 78e7adca..e194e8c6 100644 --- a/cmd/integration-test/pkg/tests/common.go +++ b/internal/integration/common_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" @@ -11,19 +13,24 @@ import ( "errors" "fmt" "net" + "net/http" "strings" + "testing" + "time" "github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/safe" "github.com/cosi-project/runtime/pkg/state" talosclient "github.com/siderolabs/talos/pkg/machinery/client" - clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config" + talosclientconfig "github.com/siderolabs/talos/pkg/machinery/client/config" "github.com/siderolabs/talos/pkg/machinery/resources/cluster" + "golang.org/x/sync/semaphore" "github.com/siderolabs/omni/client/pkg/client" "github.com/siderolabs/omni/client/pkg/omni/resources" "github.com/siderolabs/omni/client/pkg/omni/resources/omni" "github.com/siderolabs/omni/internal/backend/runtime/talos" + "github.com/siderolabs/omni/internal/pkg/clientconfig" ) func resourceDetails(res resource.Resource) string { @@ -137,7 +144,7 @@ func talosClient(ctx context.Context, cli *client.Client, clusterName string) (* return nil, errors.New("empty talosconfig") } - config, err := clientconfig.FromBytes(data) + config, err := talosclientconfig.FromBytes(data) if err != nil { return nil, err } @@ -177,3 +184,127 @@ func talosNodeIPs(ctx context.Context, talosState state.State) ([]string, error) return nodeIPs, nil } + +//nolint:govet +type testGroup struct { + Name string + Description string + Parallel bool + MachineClaim int + Subtests []subTest + Finalizer func(t *testing.T) +} + +//nolint:govet +type subTest struct { + Name string + F func(t *testing.T) +} + +type subTestList []subTest + +func subTests(items ...subTest) subTestList { + return items +} + +func (l subTestList) Append(items ...subTest) subTestList { + return append(l, items...) +} + +// MachineOptions are the options for machine creation. +type MachineOptions struct { + TalosVersion string + KubernetesVersion string +} + +// TestFunc is a testing function prototype. +type TestFunc func(t *testing.T) + +// RestartAMachineFunc is a function to restart a machine by UUID. +type RestartAMachineFunc func(ctx context.Context, uuid string) error + +// WipeAMachineFunc is a function to wipe a machine by UUID. +type WipeAMachineFunc func(ctx context.Context, uuid string) error + +// FreezeAMachineFunc is a function to freeze a machine by UUID. +type FreezeAMachineFunc func(ctx context.Context, uuid string) error + +// HTTPRequestSignerFunc is function to sign the HTTP request. +type HTTPRequestSignerFunc func(ctx context.Context, req *http.Request) error + +// TalosAPIKeyPrepareFunc is a function to prepare a public key for Talos API auth. +type TalosAPIKeyPrepareFunc func(ctx context.Context, contextName string) error + +// Options for the test runner. +// +//nolint:govet +type Options struct { + CleanupLinks bool + SkipExtensionsCheckOnCreate bool + RunStatsCheck bool + ExpectedMachines int + + RestartAMachineFunc RestartAMachineFunc + WipeAMachineFunc WipeAMachineFunc + FreezeAMachineFunc FreezeAMachineFunc + ProvisionConfigs []MachineProvisionConfig + + MachineOptions MachineOptions + + HTTPEndpoint string + AnotherTalosVersion string + AnotherKubernetesVersion string + OmnictlPath string + ScalingTimeout time.Duration + StaticInfraProvider string + OutputDir string +} + +func (o Options) defaultInfraProvider() string { + if len(o.ProvisionConfigs) == 0 { + return "" + } + + return o.ProvisionConfigs[0].Provider.ID +} + +func (o Options) defaultProviderData() string { + if len(o.ProvisionConfigs) == 0 { + return "{}" + } + + return o.ProvisionConfigs[0].Provider.Data +} + +func (o Options) provisionMachines() bool { + var totalMachineCount int + + for _, cfg := range o.ProvisionConfigs { + totalMachineCount += cfg.MachineCount + } + + return totalMachineCount > 0 +} + +// MachineProvisionConfig tells the test to provision machines from the infra provider. +type MachineProvisionConfig struct { + Provider MachineProviderConfig `yaml:"provider"` + MachineCount int `yaml:"count"` +} + +// MachineProviderConfig keeps the configuration of the infra provider for the machine provision config. +type MachineProviderConfig struct { + ID string `yaml:"id"` + Data string `yaml:"data"` + Static bool `yaml:"static"` +} + +// TestOptions constains all common data that might be required to run the tests. +type TestOptions struct { + Options + omniClient *client.Client + talosAPIKeyPrepare TalosAPIKeyPrepareFunc + clientConfig *clientconfig.ClientConfig + + machineSemaphore *semaphore.Weighted +} diff --git a/cmd/integration-test/pkg/tests/config_patch.go b/internal/integration/config_patch_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/config_patch.go rename to internal/integration/config_patch_test.go index dc7be4b0..4670e3f9 100644 --- a/cmd/integration-test/pkg/tests/config_patch.go +++ b/internal/integration/config_patch_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/extensions.go b/internal/integration/extensions_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/extensions.go rename to internal/integration/extensions_test.go index 912eb9c9..1c253a22 100644 --- a/cmd/integration-test/pkg/tests/extensions.go +++ b/internal/integration/extensions_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/image.go b/internal/integration/image_test.go similarity index 97% rename from cmd/integration-test/pkg/tests/image.go rename to internal/integration/image_test.go index 321e47b0..54cb9809 100644 --- a/cmd/integration-test/pkg/tests/image.go +++ b/internal/integration/image_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/internal/integration/infra_test.go b/internal/integration/infra_test.go new file mode 100644 index 00000000..74a0b2e5 --- /dev/null +++ b/internal/integration/infra_test.go @@ -0,0 +1,359 @@ +// Copyright (c) 2025 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +//go:build integration + +package integration_test + +import ( + "context" + "fmt" + "slices" + "testing" + "time" + + "github.com/cosi-project/runtime/pkg/resource" + "github.com/cosi-project/runtime/pkg/resource/rtestutils" + "github.com/cosi-project/runtime/pkg/safe" + "github.com/cosi-project/runtime/pkg/state" + "github.com/siderolabs/go-retry/retry" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zaptest" + + "github.com/siderolabs/omni/client/api/omni/specs" + "github.com/siderolabs/omni/client/pkg/client" + "github.com/siderolabs/omni/client/pkg/omni/resources" + "github.com/siderolabs/omni/client/pkg/omni/resources/infra" + "github.com/siderolabs/omni/client/pkg/omni/resources/omni" + "github.com/siderolabs/omni/client/pkg/omni/resources/siderolink" +) + +// machineProvisionHook creates a machine request set and waits until all requests are fulfilled. +// +//nolint:gocognit +func machineProvisionHook(t *testing.T, client *client.Client, cfg MachineProvisionConfig, machineRequestSetName, + talosVersion string, +) { + ctx, cancel := context.WithTimeout(t.Context(), time.Minute*5) + defer cancel() + + rtestutils.AssertResources(ctx, t, client.Omni().State(), []string{cfg.Provider.ID}, func(*infra.ProviderStatus, *assert.Assertions) {}) + + machineRequestSet, err := safe.ReaderGetByID[*omni.MachineRequestSet](ctx, client.Omni().State(), machineRequestSetName) + + if !state.IsNotFoundError(err) { + require.NoError(t, err) + } + + if machineRequestSet != nil { + rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName}) + } + + machineRequestSet = omni.NewMachineRequestSet(resources.DefaultNamespace, machineRequestSetName) + + machineRequestSet.TypedSpec().Value.Extensions = []string{ + "siderolabs/" + HelloWorldServiceExtensionName, + } + + machineRequestSet.TypedSpec().Value.ProviderId = cfg.Provider.ID + machineRequestSet.TypedSpec().Value.TalosVersion = talosVersion + machineRequestSet.TypedSpec().Value.ProviderData = cfg.Provider.Data + machineRequestSet.TypedSpec().Value.MachineCount = int32(cfg.MachineCount) + + require.NoError(t, client.Omni().State().Create(ctx, machineRequestSet)) + + var resources safe.List[*infra.MachineRequestStatus] + + err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error { + resources, err = safe.ReaderListAll[*infra.MachineRequestStatus](ctx, client.Omni().State(), + state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)), + ) + if err != nil { + return err + } + + if resources.Len() != cfg.MachineCount { + return retry.ExpectedErrorf("provision machine count is %d, expected %d", resources.Len(), cfg.MachineCount) + } + + return nil + }) + + require.NoError(t, err) + + err = retry.Constant(time.Minute*5).RetryWithContext(ctx, func(ctx context.Context) error { + var machines safe.List[*omni.MachineStatus] + + machines, err = safe.ReaderListAll[*omni.MachineStatus](ctx, client.Omni().State()) + if err != nil { + return err + } + + if machines.Len() < cfg.MachineCount { + return retry.ExpectedErrorf("links count is %d, expected at least %d", machines.Len(), cfg.MachineCount) + } + + for r := range resources.All() { + requestedMachines := machines.FilterLabelQuery(resource.LabelEqual(omni.LabelMachineRequest, r.Metadata().ID())) + + if requestedMachines.Len() == 0 { + return retry.ExpectedErrorf("machine request %q doesn't have the related link", r.Metadata().ID()) + } + + if requestedMachines.Len() != 1 { + return fmt.Errorf("more than one machine is labeled with %q machine request label", r.Metadata().ID()) + } + + m := requestedMachines.Get(0) + if m.TypedSpec().Value.Hardware == nil { + return retry.ExpectedErrorf("the machine %q is not fully provisioned", r.Metadata().ID()) + } + } + + return nil + }) + + require.NoError(t, err) +} + +// machineDeprovisionHook removes the machine request set and checks that all related links were deleted. +func machineDeprovisionHook(t *testing.T, client *client.Client, machineRequestSetName string) { + ctx, cancel := context.WithTimeout(t.Context(), time.Minute*5) + defer cancel() + + requestIDs := rtestutils.ResourceIDs[*infra.MachineRequest](ctx, t, client.Omni().State(), + state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)), + ) + + links, err := safe.ReaderListAll[*siderolink.Link](ctx, client.Omni().State()) + + require.NoError(t, err) + + linkIDs := make([]string, 0, len(requestIDs)) + + for l := range links.All() { + mr, ok := l.Metadata().Labels().Get(omni.LabelMachineRequest) + if !ok { + continue + } + + if slices.Index(requestIDs, mr) != -1 { + linkIDs = append(linkIDs, l.Metadata().ID()) + } + } + + rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName}) + + for _, id := range requestIDs { + rtestutils.AssertNoResource[*infra.MachineRequest](ctx, t, client.Omni().State(), id) + } + + for _, id := range linkIDs { + rtestutils.AssertNoResource[*siderolink.Link](ctx, t, client.Omni().State(), id) + } +} + +// infraMachinesAcceptHook asserts that there are a certain number of machines that are not accepted, provisioned by the static infra provider with the given ID. +// +// It then accepts them all and asserts that the states of various resources are updated as expected. +func infraMachinesAcceptHook(t *testing.T, omniState state.State, infraProviderID string, expectedCount int, disableKexec bool) { + const disableKexecConfigPatch = `machine: + install: + extraKernelArgs: + - kexec_load_disabled=1 + sysctls: + kernel.kexec_load_disabled: "1"` + + logger := zaptest.NewLogger(t) + + ctx, cancel := context.WithTimeout(t.Context(), time.Minute*10) + defer cancel() + + linksMap := make(map[string]*siderolink.Link, expectedCount) + + err := retry.Constant(time.Minute*10).RetryWithContext(ctx, func(ctx context.Context) error { + links, err := safe.ReaderListAll[*siderolink.Link](ctx, omniState) + if err != nil { + return err + } + + discoveredLinks := 0 + + for link := range links.All() { + providerID, ok := link.Metadata().Annotations().Get(omni.LabelInfraProviderID) + if !ok { + continue + } + + if infraProviderID == providerID { + discoveredLinks++ + } + + linksMap[link.Metadata().ID()] = link + } + + if discoveredLinks != expectedCount { + return retry.ExpectedErrorf("expected %d static infra provider machines, got %d", expectedCount, discoveredLinks) + } + + return nil + }) + + require.NoError(t, err) + + // link count should match the expected count + require.Equal(t, expectedCount, len(linksMap)) + + ids := make([]resource.ID, 0, len(linksMap)) + + for id := range linksMap { + ids = append(ids, id) + + rtestutils.AssertResource(ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) { + assertion.Equal(specs.InfraMachineConfigSpec_PENDING, res.TypedSpec().Value.AcceptanceStatus) + }) + + rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, id) + + rtestutils.AssertNoResource[*omni.Machine](ctx, t, omniState, id) + + // Accept the machine + infraMachineConfig := omni.NewInfraMachineConfig(resources.DefaultNamespace, id) + + infraMachineConfig.TypedSpec().Value.AcceptanceStatus = specs.InfraMachineConfigSpec_ACCEPTED + + if disableKexec { + infraMachineConfig.TypedSpec().Value.ExtraKernelArgs = "kexec_load_disabled=1" + } + + require.NoError(t, omniState.Create(ctx, infraMachineConfig)) + + if disableKexec { + disableKexecConfigPatchRes := omni.NewConfigPatch(resources.DefaultNamespace, fmt.Sprintf("500-%s-disable-kexec", id)) + + disableKexecConfigPatchRes.Metadata().Labels().Set(omni.LabelMachine, id) + + require.NoError(t, disableKexecConfigPatchRes.TypedSpec().Value.SetUncompressedData([]byte(disableKexecConfigPatch))) + require.NoError(t, omniState.Create(ctx, disableKexecConfigPatchRes)) + } + } + + logger.Info("accepted machines", zap.Reflect("infra_provider_id", infraProviderID), zap.Strings("machine_ids", ids)) + + // Assert that the infra.Machines are now marked as accepted + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.Machine, assertion *assert.Assertions) { + assertion.Equal(specs.InfraMachineConfigSpec_ACCEPTED, res.TypedSpec().Value.AcceptanceStatus) + }) + + // Assert that omni.Machine resources are now created and marked as managed by the static infra provider + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) { + _, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider) + + assertion.True(isManagedByStaticInfraProvider) + }) + + // Assert that omni.Machine resources are now created + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) { + _, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider) + + assertion.True(isManagedByStaticInfraProvider) + }) + + // Assert that infra.MachineStatus resources are now created, powered off, marked as ready to use, and the machine labels are set on them + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.MachineStatus, assertion *assert.Assertions) { + aVal, _ := res.Metadata().Labels().Get("a") + assertion.Equal("b", aVal) + + _, cOk := res.Metadata().Labels().Get("c") + assertion.True(cOk) + + assertion.Equal(specs.InfraMachineStatusSpec_POWER_STATE_OFF, res.TypedSpec().Value.PowerState) + assertion.True(res.TypedSpec().Value.ReadyToUse) + }) + + // Assert the infra provider labels on MachineStatus resources + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.MachineStatus, assertion *assert.Assertions) { + link := linksMap[res.Metadata().ID()] + + infraProviderID, _ := link.Metadata().Annotations().Get(omni.LabelInfraProviderID) + + aLabel := fmt.Sprintf(omni.InfraProviderLabelPrefixFormat, infraProviderID) + "a" + aVal, _ := res.Metadata().Labels().Get(aLabel) + + assertion.Equal("b", aVal) + + cLabel := fmt.Sprintf(omni.InfraProviderLabelPrefixFormat, infraProviderID) + "c" + _, cOk := res.Metadata().Labels().Get(cLabel) + assertion.True(cOk) + }) +} + +// infraMachinesDestroyHook removes siderolink.Link resources for all machines managed by a static infra provider, +// and asserts that the related infra.Machine and infra.MachineStatus resources are deleted. +func infraMachinesDestroyHook(t *testing.T, omniState state.State, providerID string, count int) { + ctx, cancel := context.WithTimeout(t.Context(), time.Minute*10) + defer cancel() + + links, err := safe.StateListAll[*siderolink.Link](ctx, omniState) + require.NoError(t, err) + + var deleted int + + for link := range links.All() { + pid, ok := link.Metadata().Annotations().Get(omni.LabelInfraProviderID) + if !ok { + continue + } + + if pid != providerID { + continue + } + + id := link.Metadata().ID() + + rtestutils.Destroy[*siderolink.Link](ctx, t, omniState, []string{id}) + + rtestutils.AssertNoResource[*infra.Machine](ctx, t, omniState, id) + rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, id) + + deleted++ + } + + require.EqualValues(t, count, deleted) +} + +// AssertInfraMachinesAreAllocated asserts that the machines that belong to the given cluster and managed by a static infra provider +// are marked as allocated in the related resources. +func AssertInfraMachinesAreAllocated(testCtx context.Context, omniState state.State, clusterID, talosVersion string, extensions []string) TestFunc { + return func(t *testing.T) { + ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) + defer cancel() + + nodeList, err := safe.StateListAll[*omni.MachineSetNode](ctx, omniState, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterID))) + require.NoError(t, err) + + require.Greater(t, nodeList.Len(), 0) + + for machineSetNode := range nodeList.All() { + id := machineSetNode.Metadata().ID() + + // There must be an infra.Machine resource for each node + rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) { + assertion.Equal(talosVersion, res.TypedSpec().Value.ClusterTalosVersion) + assertion.Empty(res.TypedSpec().Value.WipeId) + assertion.Equal(extensions, res.TypedSpec().Value.Extensions) + }) + + // The machine is allocated, so it will be powered on and be ready to use + rtestutils.AssertResource[*infra.MachineStatus](ctx, t, omniState, id, func(res *infra.MachineStatus, assertion *assert.Assertions) { + assertion.Equal(specs.InfraMachineStatusSpec_POWER_STATE_ON, res.TypedSpec().Value.PowerState) + assertion.True(res.TypedSpec().Value.ReadyToUse) + assertion.True(res.TypedSpec().Value.Installed) + }) + } + } +} diff --git a/internal/integration/integration_test.go b/internal/integration/integration_test.go new file mode 100644 index 00000000..1bace46d --- /dev/null +++ b/internal/integration/integration_test.go @@ -0,0 +1,348 @@ +// Copyright (c) 2025 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +//go:build integration + +package integration_test + +import ( + "context" + "errors" + "flag" + "fmt" + "io" + "net/url" + "os" + "os/exec" + "testing" + "time" + + "github.com/cosi-project/runtime/pkg/safe" + "github.com/cosi-project/runtime/pkg/state" + "github.com/mattn/go-shellwords" + "github.com/stretchr/testify/require" + "golang.org/x/sync/semaphore" + "gopkg.in/yaml.v3" + + clientconsts "github.com/siderolabs/omni/client/pkg/constants" + "github.com/siderolabs/omni/client/pkg/omni/resources/siderolink" + _ "github.com/siderolabs/omni/cmd/acompat" // this package should always be imported first for init->set env to work + "github.com/siderolabs/omni/internal/pkg/clientconfig" + "github.com/siderolabs/omni/internal/pkg/constants" +) + +// Flag values. +var ( + omniEndpoint string + restartAMachineScript string + wipeAMachineScript string + freezeAMachineScript string + omnictlPath string + talosVersion string + anotherTalosVersion string + kubernetesVersion string + anotherKubernetesVersion string + expectedMachines int + + // provisioning flags + provisionMachinesCount int + infraProvider string + providerData string + provisionConfigFile string + + scalingTimeout time.Duration + + cleanupLinks bool + runStatsCheck bool + skipExtensionsCheckOnCreate bool + artifactsOutputDir string +) + +func TestIntegration(t *testing.T) { + machineOptions := MachineOptions{ + TalosVersion: talosVersion, + KubernetesVersion: kubernetesVersion, + } + + options := Options{ + ExpectedMachines: expectedMachines, + CleanupLinks: cleanupLinks, + RunStatsCheck: runStatsCheck, + SkipExtensionsCheckOnCreate: skipExtensionsCheckOnCreate, + + MachineOptions: machineOptions, + AnotherTalosVersion: anotherTalosVersion, + AnotherKubernetesVersion: anotherKubernetesVersion, + OmnictlPath: omnictlPath, + ScalingTimeout: scalingTimeout, + OutputDir: artifactsOutputDir, + } + + if provisionConfigFile != "" { + f, err := os.Open(provisionConfigFile) + + require.NoError(t, err, "failed to open provision config file") + + decoder := yaml.NewDecoder(f) + + for { + var cfg MachineProvisionConfig + + if err = decoder.Decode(&cfg); err != nil { + if errors.Is(err, io.EOF) { + break + } + + require.NoError(t, err, "failed to parse provision config file") + } + + options.ProvisionConfigs = append(options.ProvisionConfigs, cfg) + } + } else { + options.ProvisionConfigs = append(options.ProvisionConfigs, + MachineProvisionConfig{ + MachineCount: provisionMachinesCount, + Provider: MachineProviderConfig{ + ID: infraProvider, + Data: providerData, + }, + }, + ) + } + + if restartAMachineScript != "" { + parsedScript, err := shellwords.Parse(restartAMachineScript) + require.NoError(t, err, "failed to parse restart-a-machine-script file") + + options.RestartAMachineFunc = func(ctx context.Context, uuid string) error { + return execCmd(ctx, parsedScript, uuid) + } + } + + if wipeAMachineScript != "" { + parsedScript, err := shellwords.Parse(wipeAMachineScript) + require.NoError(t, err, "failed to parse wipe-a-machine-script file") + + options.WipeAMachineFunc = func(ctx context.Context, uuid string) error { + return execCmd(ctx, parsedScript, uuid) + } + } + + if freezeAMachineScript != "" { + parsedScript, err := shellwords.Parse(freezeAMachineScript) + require.NoError(t, err, "failed to parse freeze-a-machine-script file") + + options.FreezeAMachineFunc = func(ctx context.Context, uuid string) error { + return execCmd(ctx, parsedScript, uuid) + } + } + + u, err := url.Parse(omniEndpoint) + require.NoError(t, err, "error parsing omni endpoint") + + if u.Scheme == "grpc" { + u.Scheme = "http" + } + + options.HTTPEndpoint = u.String() + + clientConfig := clientconfig.New(omniEndpoint) + + t.Cleanup(func() { + clientConfig.Close() //nolint:errcheck + }) + + rootClient, err := clientConfig.GetClient(t.Context()) + require.NoError(t, err) + + t.Cleanup(func() { + require.NoError(t, rootClient.Close()) + }) + + talosAPIKeyPrepare := func(ctx context.Context, contextName string) error { + return clientconfig.TalosAPIKeyPrepare(ctx, rootClient, contextName) + } + + if !clientconsts.IsDebugBuild { + // noop for non-debug builds + talosAPIKeyPrepare = func(context.Context, string) error { + return nil + } + } + + testOptions := &TestOptions{ + omniClient: rootClient, + talosAPIKeyPrepare: talosAPIKeyPrepare, + Options: options, + machineSemaphore: semaphore.NewWeighted(int64(options.ExpectedMachines)), + clientConfig: clientConfig, + } + + preRunHooks(t, testOptions) + + t.Run("Suites", func(t *testing.T) { + t.Run("CleanState", testCleanState(testOptions)) + t.Run("TalosImageGeneration", testImageGeneration(testOptions)) + t.Run("CLICommands", testCLICommands(testOptions)) + t.Run("KubernetesNodeAudit", testKubernetesNodeAudit(testOptions)) + t.Run("ForcedMachineRemoval", testForcedMachineRemoval(testOptions)) + t.Run("ImmediateClusterDestruction", testImmediateClusterDestruction(testOptions)) + t.Run("DefaultCluster", testDefaultCluster(testOptions)) + t.Run("EncryptedCluster", testEncryptedCluster(testOptions)) + t.Run("SinglenodeCluster", testSinglenodeCluster(testOptions)) + t.Run("ScaleUpAndDown", testScaleUpAndDown(testOptions)) + t.Run("ScaleUpAndDownMachineClassBasedMachineSets", testScaleUpAndDownMachineClassBasedMachineSets(testOptions)) + t.Run("ScaleUpAndDownAutoProvisionMachineSets", testScaleUpAndDownAutoProvisionMachineSets(testOptions)) + t.Run("RollingUpdateParallelism", testRollingUpdateParallelism(testOptions)) + t.Run("ReplaceControlPlanes", testReplaceControlPlanes(testOptions)) + t.Run("ConfigPatching", testConfigPatching(testOptions)) + t.Run("TalosUpgrades", testTalosUpgrades(testOptions)) + t.Run("KubernetesUpgrades", testKubernetesUpgrades(testOptions)) + t.Run("EtcdBackupAndRestore", testEtcdBackupAndRestore(testOptions)) + t.Run("MaintenanceUpgrade", testMaintenanceUpgrade(testOptions)) + t.Run("Auth", testAuth(testOptions)) + t.Run("ClusterTemplate", testClusterTemplate(testOptions)) + t.Run("WorkloadProxy", testWorkloadProxy(testOptions)) + t.Run("StaticInfraProvider", testStaticInfraProvider(testOptions)) + }) + + postRunHooks(t, testOptions) +} + +func init() { + flag.StringVar(&omniEndpoint, "omni.endpoint", "grpc://127.0.0.1:8080", "The endpoint of the Omni API.") + flag.IntVar(&expectedMachines, "omni.expected-machines", 4, "minimum number of machines expected") + flag.StringVar(&restartAMachineScript, "omni.restart-a-machine-script", "hack/test/restart-a-vm.sh", "a script to run to restart a machine by UUID (optional)") + flag.StringVar(&wipeAMachineScript, "omni.wipe-a-machine-script", "hack/test/wipe-a-vm.sh", "a script to run to wipe a machine by UUID (optional)") + flag.StringVar(&freezeAMachineScript, "omni.freeze-a-machine-script", "hack/test/freeze-a-vm.sh", "a script to run to freeze a machine by UUID (optional)") + flag.StringVar(&omnictlPath, "omni.omnictl-path", "_out/omnictl-linux-amd64", "omnictl CLI script path (optional)") + flag.StringVar(&anotherTalosVersion, "omni.another-talos-version", + constants.AnotherTalosVersion, + "omni.Talos version for upgrade test", + ) + flag.StringVar( + &talosVersion, + "omni.talos-version", + clientconsts.DefaultTalosVersion, + "omni.installer version for workload clusters", + ) + flag.StringVar(&kubernetesVersion, "omni.kubernetes-version", constants.DefaultKubernetesVersion, "Kubernetes version for workload clusters") + flag.StringVar(&anotherKubernetesVersion, "omni.another-kubernetes-version", constants.AnotherKubernetesVersion, "Kubernetes version for upgrade tests") + flag.BoolVar(&cleanupLinks, "omni.cleanup-links", false, "remove all links after the tests are complete") + flag.BoolVar(&runStatsCheck, "omni.run-stats-check", false, "runs stats check after the test is complete") + flag.IntVar(&provisionMachinesCount, "omni.provision-machines", 0, "provisions machines through the infrastructure provider") + flag.StringVar(&infraProvider, "omni.infra-provider", "talemu", "use infra provider with the specified ID when provisioning the machines") + flag.StringVar(&providerData, "omni.provider-data", "{}", "the infra provider machine template data to use") + flag.DurationVar(&scalingTimeout, "omni.scale-timeout", time.Second*150, "scale up test timeout") + flag.StringVar(&provisionConfigFile, "omni.provision-config-file", "", "provision machines with the more complicated configuration") + flag.BoolVar(&skipExtensionsCheckOnCreate, "omni.skip-extensions-check-on-create", false, + "omni.disables checking for hello-world-service extension on the machine allocation and in the upgrade tests") + flag.StringVar(&artifactsOutputDir, "omni.output-dir", "/tmp/integration-test", "output directory for the files generated by the test, e.g., the support bundles") +} + +func execCmd(ctx context.Context, parsedScript []string, args ...string) error { + cmd := exec.CommandContext(ctx, parsedScript[0], append(parsedScript[1:], args...)...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + return cmd.Run() +} + +func (opts *TestOptions) claimMachines(t *testing.T, count int) { + require.GreaterOrEqual(t, expectedMachines, count) + + t.Logf("attempting to acquire semaphore for %d machines", count) + + if err := opts.machineSemaphore.Acquire(t.Context(), int64(count)); err != nil { + t.Fatalf("failed to acquire machine semaphore: %s", err) + } + + t.Logf("acquired semaphore for %d machines", count) + + t.Cleanup(func() { + t.Logf("releasing semaphore for %d machines", count) + + opts.machineSemaphore.Release(int64(count)) + }) +} + +func runTests(t *testing.T, tests []subTest) { + for _, tt := range tests { + t.Run(tt.Name, tt.F) + } +} + +func preRunHooks(t *testing.T, options *TestOptions) { + if !options.provisionMachines() { + return + } + + for i, cfg := range options.ProvisionConfigs { + if cfg.Provider.Static { + infraMachinesAcceptHook(t, options.omniClient.Omni().State(), cfg.Provider.ID, cfg.MachineCount, true) + + continue + } + + t.Logf("provision %d machines using provider %q, machine request set name provisioned%d", + cfg.MachineCount, + cfg.Provider.ID, + i, + ) + + machineProvisionHook( + t, + options.omniClient, + cfg, + fmt.Sprintf("provisioned%d", i), + options.MachineOptions.TalosVersion, + ) + } +} + +func postRunHooks(t *testing.T, options *TestOptions) { + if options.provisionMachines() { + for i, cfg := range options.ProvisionConfigs { + if cfg.Provider.Static { + infraMachinesDestroyHook(t, options.omniClient.Omni().State(), cfg.Provider.ID, cfg.MachineCount) + + continue + } + + machineDeprovisionHook(t, options.omniClient, fmt.Sprintf("provisioned%d", i)) + } + } + + if options.RunStatsCheck { + t.Log("checking controller stats for the write and read spikes") + + statsLimitsHook(t) + } + + if options.CleanupLinks { + require.NoError(t, cleanupLinksFunc(t.Context(), options.omniClient.Omni().State())) + } +} + +func cleanupLinksFunc(ctx context.Context, st state.State) error { + links, err := safe.ReaderListAll[*siderolink.Link](ctx, st) + if err != nil { + return err + } + + var cancel context.CancelFunc + + ctx, cancel = context.WithTimeout(ctx, time.Minute) + defer cancel() + + return links.ForEachErr(func(r *siderolink.Link) error { + err := st.TeardownAndDestroy(ctx, r.Metadata()) + if err != nil && !state.IsNotFoundError(err) { + return err + } + + return nil + }) +} diff --git a/cmd/integration-test/pkg/tests/kubernetes_node_audit.go b/internal/integration/kubernetes_node_audit_test.go similarity index 93% rename from cmd/integration-test/pkg/tests/kubernetes_node_audit.go rename to internal/integration/kubernetes_node_audit_test.go index a9982602..acb1ea73 100644 --- a/cmd/integration-test/pkg/tests/kubernetes_node_audit.go +++ b/internal/integration/kubernetes_node_audit_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" @@ -20,7 +22,6 @@ import ( "go.uber.org/zap/zaptest" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/siderolabs/omni/client/pkg/client" "github.com/siderolabs/omni/client/pkg/omni/resources/omni" ) @@ -31,7 +32,9 @@ import ( // 3. Assert that the ClusterMachine resource is deleted - the ClusterMachineTeardownController did not block its deletion despite failing to remove the node from Kubernetes. // 4. Wake the control plane back up. // 5. Assert that the worker node eventually gets removed from Kubernetes due to node audit. -func AssertKubernetesNodeAudit(ctx context.Context, st state.State, clusterName string, omniClient *client.Client, options Options) TestFunc { +func AssertKubernetesNodeAudit(ctx context.Context, clusterName string, options *TestOptions) TestFunc { + st := options.omniClient.Omni().State() + return func(t *testing.T) { if options.FreezeAMachineFunc == nil || options.RestartAMachineFunc == nil { t.Skip("skip the test as FreezeAMachineFunc or RestartAMachineFunc is not set") @@ -82,7 +85,7 @@ func AssertKubernetesNodeAudit(ctx context.Context, st state.State, clusterName require.NoError(t, options.RestartAMachineFunc(ctx, id)) } - kubernetesClient := getKubernetesClient(ctx, t, omniClient.Management(), clusterName) + kubernetesClient := getKubernetesClient(ctx, t, options.omniClient.Management(), clusterName) logger.Info("assert that the node is removed from Kubernetes due to node audit") diff --git a/cmd/integration-test/pkg/tests/kubernetes.go b/internal/integration/kubernetes_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/kubernetes.go rename to internal/integration/kubernetes_test.go index 32c08241..edd503e8 100644 --- a/cmd/integration-test/pkg/tests/kubernetes.go +++ b/internal/integration/kubernetes_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/machines.go b/internal/integration/machines_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/machines.go rename to internal/integration/machines_test.go index c3a272d7..4c08c320 100644 --- a/cmd/integration-test/pkg/tests/machines.go +++ b/internal/integration/machines_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "bufio" diff --git a/cmd/integration-test/pkg/tests/maintenance.go b/internal/integration/maintenance_test.go similarity index 97% rename from cmd/integration-test/pkg/tests/maintenance.go rename to internal/integration/maintenance_test.go index 3de13c04..445368fa 100644 --- a/cmd/integration-test/pkg/tests/maintenance.go +++ b/internal/integration/maintenance_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/omniconfig.go b/internal/integration/omniconfig_test.go similarity index 93% rename from cmd/integration-test/pkg/tests/omniconfig.go rename to internal/integration/omniconfig_test.go index 9f7d59e5..1f084208 100644 --- a/cmd/integration-test/pkg/tests/omniconfig.go +++ b/internal/integration/omniconfig_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/rolling_update.go b/internal/integration/rolling_update_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/rolling_update.go rename to internal/integration/rolling_update_test.go index 350c4eee..207a0276 100644 --- a/cmd/integration-test/pkg/tests/rolling_update.go +++ b/internal/integration/rolling_update_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/cmd/integration-test/pkg/tests/siderolink.go b/internal/integration/siderolink_test.go similarity index 98% rename from cmd/integration-test/pkg/tests/siderolink.go rename to internal/integration/siderolink_test.go index 6bb147fb..cc69ee55 100644 --- a/cmd/integration-test/pkg/tests/siderolink.go +++ b/internal/integration/siderolink_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "context" diff --git a/internal/integration/stats_test.go b/internal/integration/stats_test.go new file mode 100644 index 00000000..36210f47 --- /dev/null +++ b/internal/integration/stats_test.go @@ -0,0 +1,143 @@ +// Copyright (c) 2025 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +//go:build integration + +package integration_test + +import ( + "context" + "errors" + "fmt" + "sort" + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/api" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + "github.com/siderolabs/go-retry/retry" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// statsLimitsHook checks that metrics don't show any spikes of resource reads/writes, controller wakeups. +// This test should only be run after the integration tests set with Talemu enabled as the thresholds are adjusted for it. +// Should have Prometheus running on 9090. +func statsLimitsHook(t *testing.T) { + for _, tt := range []struct { + check func(assert *assert.Assertions, value float64) + name string + query string + }{ + { + name: "resource CRUD", + query: `sum(omni_resource_operations_total{operation=~"create|update", type!="MachineStatusLinks.omni.sidero.dev"})`, + check: func(assert *assert.Assertions, value float64) { + limit := float64(12000) + + assert.Lessf(value, limit, "resource CRUD operations were expected to be less than %f. "+ + "If the limit is exceeded not because of a leak but because you added some new resources/controllers, adjust the limit accordingly.", limit) + }, + }, + { + name: "queue length", + query: `sum(omni_runtime_qcontroller_queue_length)`, + check: func(assert *assert.Assertions, value float64) { assert.Zero(value) }, + }, + { + name: "controller wakeups", + query: `sum(omni_runtime_controller_wakeups{controller!="MachineStatusLinkController"})`, + check: func(assert *assert.Assertions, value float64) { + limit := float64(12000) + + assert.Lessf(value, limit, "controller wakeups were expected to be less than %f. "+ + "If the limit is exceeded not because of a leak but because you added some new resources/controllers, adjust the limit accordingly.", limit) + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), time.Second*16) + defer cancel() + + err := retry.Constant(time.Second * 15).Retry(func() error { + promClient, err := api.NewClient(api.Config{ + Address: "http://127.0.0.1:9090", + }) + if err != nil { + return retry.ExpectedError(err) + } + + var ( + value model.Value + warnings v1.Warnings + ) + + agg := assertionAggregator{} + + v1api := v1.NewAPI(promClient) + + value, warnings, err = v1api.Query(ctx, tt.query, time.Now()) + if err != nil { + return retry.ExpectedError(err) + } + + if len(warnings) > 0 { + return retry.ExpectedErrorf("prometheus query had warnings %#v", warnings) + } + + assert := assert.New(&agg) + + switch val := value.(type) { + case *model.Scalar: + tt.check(assert, float64(val.Value)) + case model.Vector: + tt.check(assert, float64(val[val.Len()-1].Value)) + default: + return fmt.Errorf("unexpected value type %s", val.Type()) + } + + if agg.hadErrors { + return retry.ExpectedError(errors.New(agg.String())) + } + + return nil + }) + + require.NoError(t, err) + }) + } +} + +type assertionAggregator struct { + errors map[string]struct{} + hadErrors bool +} + +func (agg *assertionAggregator) Errorf(format string, args ...any) { + errorString := fmt.Sprintf(format, args...) + + if agg.errors == nil { + agg.errors = map[string]struct{}{} + } + + agg.errors[errorString] = struct{}{} + agg.hadErrors = true +} + +func (agg *assertionAggregator) String() string { + lines := make([]string, 0, len(agg.errors)) + + for errorString := range agg.errors { + lines = append(lines, " * "+errorString) + } + + sort.Strings(lines) + + return strings.Join(lines, "\n") +} diff --git a/internal/integration/suites_test.go b/internal/integration/suites_test.go new file mode 100644 index 00000000..d0016455 --- /dev/null +++ b/internal/integration/suites_test.go @@ -0,0 +1,1373 @@ +// Copyright (c) 2025 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +//go:build integration + +package integration_test + +import ( + "context" + "net/http" + "testing" + "time" + + "github.com/siderolabs/omni/client/api/omni/specs" + "github.com/siderolabs/omni/client/pkg/omni/resources/omni" + "github.com/siderolabs/omni/internal/pkg/clientconfig" + "google.golang.org/protobuf/types/known/durationpb" +) + +type assertClusterReadyOptions struct { + talosVersion string + kubernetesVersion string +} + +type assertClusterReadyOption func(*assertClusterReadyOptions) + +func withTalosVersion(version string) assertClusterReadyOption { + return func(acro *assertClusterReadyOptions) { + acro.talosVersion = version + } +} + +func withKubernetesVersion(version string) assertClusterReadyOption { + return func(acro *assertClusterReadyOptions) { + acro.kubernetesVersion = version + } +} + +func assertClusterAndAPIReady(t *testing.T, clusterName string, options *TestOptions, opts ...assertClusterReadyOption) { + optionsStruct := assertClusterReadyOptions{ + talosVersion: options.MachineOptions.TalosVersion, + kubernetesVersion: options.MachineOptions.KubernetesVersion, + } + + for _, o := range opts { + o(&optionsStruct) + } + + runTests(t, AssertBlockClusterAndTalosAPIAndKubernetesShouldBeReady( + t.Context(), + options.omniClient, + clusterName, + optionsStruct.talosVersion, + optionsStruct.kubernetesVersion, + options.talosAPIKeyPrepare, + )) +} + +func testCleanState(options *TestOptions) TestFunc { + return func(t *testing.T) { + ctx := t.Context() + + t.Log(` +Bring the state of Omni to a clean state by removing all clusters, config patches, etc. which might have been left from previous runs. +Wait for all expected machines to join and be in maintenance mode.`) + + t.Run( + "DestroyAllClusterRelatedResources", + DestroyAllClusterRelatedResources(ctx, options.omniClient.Omni().State()), + ) + + // machine discovery, all machines should be in maintenance mode + t.Run( + "LinkCountShouldMatchExpectedMachines", + AssertNumberOfLinks(ctx, options.omniClient.Omni().State(), expectedMachines), + ) + + t.Run( + "LinksShouldBeConnected", + AssertLinksConnected(ctx, options.omniClient.Omni().State()), + ) + + t.Run( + "LinksShouldMatchMachines", + AssertMachinesMatchLinks(ctx, options.omniClient.Omni().State()), + ) + + t.Run( + "MachinesShouldHaveLogs", + AssertMachinesHaveLogs(ctx, options.omniClient.Omni().State(), options.omniClient.Management()), + ) + + t.Run( + "MachinesShouldBeReachableInMaintenanceMode", + AssertTalosMaintenanceAPIAccessViaOmni(ctx, options.omniClient, options.talosAPIKeyPrepare), + ) + + t.Run( + "MachinesShouldBeInMaintenanceMode", + AssertMachineStatus(ctx, options.omniClient.Omni().State(), true, "", map[string]string{ + omni.MachineStatusLabelConnected: "", + omni.MachineStatusLabelReportingEvents: "", + omni.MachineStatusLabelAvailable: "", + // QEMU-specific labels which should always match, others are specific to the settings (number of cores, etc.) + omni.MachineStatusLabelCPU: "qemu", + omni.MachineStatusLabelArch: "amd64", + omni.MachineStatusLabelPlatform: "metal", + }, nil), + ) + } +} + +func testImageGeneration(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Parallel() + + t.Log(` +Generate various Talos images with Omni and try to download them.`) + + t.Run( + "TalosImagesShouldBeDownloadableUsingCLI", + AssertDownloadUsingCLI(t.Context(), options.omniClient, options.OmnictlPath, options.HTTPEndpoint), + ) + + t.Run( + "TalosImagesShouldBeDownloadable", + AssertSomeImagesAreDownloadable(t.Context(), options.omniClient, func(ctx context.Context, req *http.Request) error { + return clientconfig.SignHTTPRequest(ctx, options.omniClient, req) + }, options.HTTPEndpoint), + ) + } +} + +func testCLICommands(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Parallel() + + t.Log(` +Verify various omnictl commands.`) + + t.Run( + "OmnictlUserCLIShouldWork", + AssertUserCLI(t.Context(), options.omniClient, options.OmnictlPath, options.HTTPEndpoint), + ) + } +} + +func testKubernetesNodeAudit(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Parallel() + + clusterName := "integration-k8s-node-audit" + + options.claimMachines(t, 2) + + t.Log(` +Test the auditing of the Kubernetes nodes, i.e. when a node is gone from the Omni perspective but still exists on the Kubernetes cluster.`) + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 1, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + runTests( + t, + AssertBlockClusterAndTalosAPIAndKubernetesShouldBeReady( + t.Context(), + options.omniClient, + clusterName, + options.MachineOptions.TalosVersion, + options.MachineOptions.KubernetesVersion, + options.talosAPIKeyPrepare, + ), + ) + + t.Run( + "KubernetesNodeAuditShouldBePerformed", + AssertKubernetesNodeAudit( + t.Context(), + clusterName, + options, + ), + ) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testForcedMachineRemoval(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests different scenarios for forced Machine removal (vs. graceful removing from a cluster): + +- force remove a Machine which is not allocated (not part of any cluster) +- force remove a worker Machine which is part of the cluster +- force remove a control plane Machine which is part of the cluster, and replace with a new Machine. + +These tests simulate a hardware failure of a Machine which requires a forced removal from Omni. + +In the tests, we wipe and reboot the VMs to bring them back as available for the next test.`) + + t.Parallel() + + options.claimMachines(t, 4) + + clusterName := "integration-forced-removal" + + assertClusterReady := func() { + runTests(t, AssertBlockClusterShouldBeReady( + t.Context(), + options.omniClient, + clusterName, + options.MachineOptions.TalosVersion, + options.talosAPIKeyPrepare, + )) + } + + t.Run( + "UnallocatedMachinesShouldBeDestroyable", + AssertUnallocatedMachineDestroyFlow(t.Context(), options.omniClient.Omni().State(), options.RestartAMachineFunc), + ) + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 3, + Workers: 1, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterReady() + + t.Run( + "WorkerNodesShouldBeForceRemovable", + AssertForceRemoveWorkerNode(t.Context(), options.omniClient.Omni().State(), clusterName, options.FreezeAMachineFunc, options.WipeAMachineFunc), + ) + + assertClusterReady() + + t.Run( + "ControlPlaneNodeShouldBeForceReplaceable", + AssertControlPlaneForceReplaceMachine( + t.Context(), + options.omniClient.Omni().State(), + clusterName, + options.Options, + ), + ) + + assertClusterReady() + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testImmediateClusterDestruction(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Regression test: create a cluster and destroy it without waiting for the cluster to reach any state.`) + + t.Parallel() + + options.claimMachines(t, 3) + + clusterName := "integration-immediate" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 2, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testDefaultCluster(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Create a regular 3 + 2 cluster with HA controlplane, assert that the cluster is ready and accessible. +Don't do any changes to the cluster.`) + + t.Parallel() + + clusterOptions := ClusterOptions{ + ControlPlanes: 3, + Workers: 2, + + MachineOptions: options.MachineOptions, + } + + options.claimMachines(t, clusterOptions.ControlPlanes+clusterOptions.Workers) + + runTests(t, AssertClusterCreateAndReady( + t.Context(), + options.omniClient, + options.talosAPIKeyPrepare, + "default", + clusterOptions, + options.OutputDir, + )) + } +} + +func testEncryptedCluster(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Create a 1 + 1 cluster and enable disk encryption via Omni as a KMS. +Don't do any changes to the cluster.`) + + t.Parallel() + + clusterOptions := ClusterOptions{ + ControlPlanes: 1, + Workers: 1, + + MachineOptions: options.MachineOptions, + Features: &specs.ClusterSpec_Features{ + DiskEncryption: true, + }, + } + + options.claimMachines(t, clusterOptions.ControlPlanes+clusterOptions.Workers) + + runTests(t, AssertClusterCreateAndReady( + t.Context(), + options.omniClient, + options.talosAPIKeyPrepare, + "encrypted", + clusterOptions, + options.OutputDir, + )) + } +} + +func testSinglenodeCluster(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Create a single node cluster. +Don't do any changes to the cluster.`) + + t.Parallel() + + clusterOptions := ClusterOptions{ + ControlPlanes: 1, + Workers: 0, + + MachineOptions: options.MachineOptions, + } + + options.claimMachines(t, clusterOptions.ControlPlanes+clusterOptions.Workers) + + runTests(t, AssertClusterCreateAndReady( + t.Context(), + options.omniClient, + options.talosAPIKeyPrepare, + "singlenode", + clusterOptions, + options.OutputDir, + )) + } +} + +func testScaleUpAndDown(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests scaling up and down a cluster: + +- create a 1+0 cluster +- scale up to 1+1 +- scale up to 3+1 +- scale down to 3+0 +- scale down to 1+0 + +In between the scaling operations, assert that the cluster is ready and accessible.`) + + t.Parallel() + + options.claimMachines(t, 4) + + clusterName := "integration-scaling" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 0, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "OneWorkerShouldBeAdded", + ScaleClusterUp(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 0, + Workers: 1, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "TwoControlPlanesShouldBeAdded", + ScaleClusterUp(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 2, + Workers: 0, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "OneWorkerShouldBeRemoved", + ScaleClusterDown(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 0, + Workers: -1, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "TwoControlPlanesShouldBeRemoved", + ScaleClusterDown(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: -2, + Workers: 0, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testScaleUpAndDownMachineClassBasedMachineSets(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests scaling up and down a cluster using machine classes: + +- create a 1+0 cluster +- scale up to 1+1 +- scale up to 3+1 +- scale down to 3+0 +- scale down to 1+0 + +In between the scaling operations, assert that the cluster is ready and accessible.`) + + t.Parallel() + + options.claimMachines(t, 4) + + clusterName := "integration-scaling-machine-class-based-machine-sets" + + t.Run( + "ClusterShouldBeCreated", + CreateClusterWithMachineClass(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 0, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "OneWorkerShouldBeAdded", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 0, + Workers: 1, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "TwoControlPlanesShouldBeAdded", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 2, + Workers: 0, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "OneWorkerShouldBeRemoved", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 0, + Workers: -1, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "TwoControlPlanesShouldBeRemoved", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: -2, + Workers: 0, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testScaleUpAndDownAutoProvisionMachineSets(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests scaling up and down a cluster using infrastructure provisioner: + +- create a 1+0 cluster +- scale up to 1+1 +- scale up to 3+1 +- scale down to 3+0 +- scale down to 1+0 + +In between the scaling operations, assert that the cluster is ready and accessible.`) + + t.Parallel() + + clusterName := "integration-scaling-auto-provision" + + t.Run( + "ClusterShouldBeCreated", + CreateClusterWithMachineClass(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 0, + InfraProvider: options.defaultInfraProvider(), + + MachineOptions: options.MachineOptions, + ProviderData: options.defaultProviderData(), + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "OneWorkerShouldBeAdded", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 0, + Workers: 1, + InfraProvider: options.defaultInfraProvider(), + MachineOptions: options.MachineOptions, + ProviderData: options.defaultProviderData(), + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "TwoControlPlanesShouldBeAdded", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 2, + Workers: 0, + MachineOptions: options.MachineOptions, + ProviderData: options.defaultInfraProvider(), + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "OneWorkerShouldBeRemoved", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 0, + Workers: -1, + InfraProvider: options.defaultInfraProvider(), + MachineOptions: options.MachineOptions, + ProviderData: options.defaultProviderData(), + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "TwoControlPlanesShouldBeRemoved", + ScaleClusterMachineSets(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: -2, + Workers: 0, + InfraProvider: options.defaultInfraProvider(), + MachineOptions: options.MachineOptions, + ProviderData: options.defaultProviderData(), + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, true, false), + ) + } +} + +func testRollingUpdateParallelism(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests rolling update & scale down strategies for concurrency control for worker machine sets. + +- create a 1+3 cluster +- update the worker configs with rolling strategy using maxParallelism of 2 +- scale down the workers to 0 with rolling strategy using maxParallelism of 2 +- assert that the maxParallelism of 2 was respected and used in both operations,`) + + t.Parallel() + + clusterName := "integration-rolling-update-parallelism" + + options.claimMachines(t, 4) + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 3, + + MachineOptions: options.MachineOptions, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "WorkersUpdateShouldBeRolledOutWithMaxParallelism", + AssertWorkerNodesRollingConfigUpdate(t.Context(), options.omniClient, clusterName, 2), + ) + + t.Run( + "WorkersShouldScaleDownWithMaxParallelism", + AssertWorkerNodesRollingScaleDown(t.Context(), options.omniClient, clusterName, 2), + ) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testReplaceControlPlanes(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests replacing control plane nodes: + +- create a 1+0 cluster +- scale up to 2+0, and immediately remove the first control plane node + +In between the scaling operations, assert that the cluster is ready and accessible.`) + + t.Parallel() + + options.claimMachines(t, 2) + + clusterName := "integration-replace-cp" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 0, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ControlPlanesShouldBeReplaced", + ReplaceControlPlanes(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + + MachineOptions: options.MachineOptions, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testConfigPatching(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests applying various config patching, including "broken" config patches which should not apply.`) + + t.Parallel() + + options.claimMachines(t, 4) + + clusterName := "integration-config-patching" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 3, + Workers: 1, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "LargeImmediateConfigPatchShouldBeAppliedAndRemoved", + AssertLargeImmediateConfigApplied(t.Context(), options.omniClient, clusterName, options.talosAPIKeyPrepare), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "MachineSetConfigPatchShouldBeAppliedAndRemoved", + AssertConfigPatchMachineSet(t.Context(), options.omniClient, clusterName), + ) + + t.Run( + "SingleClusterMachineConfigPatchShouldBeAppliedAndRemoved", + AssertConfigPatchSingleClusterMachine(t.Context(), options.omniClient, clusterName), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ConfigPatchWithRebootShouldBeApplied", + AssertConfigPatchWithReboot(t.Context(), options.omniClient, clusterName, options.talosAPIKeyPrepare), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "InvalidConfigPatchShouldNotBeApplied", + AssertConfigPatchWithInvalidConfig(t.Context(), options.omniClient, clusterName, options.talosAPIKeyPrepare), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testTalosUpgrades(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests upgrading Talos version, including reverting a failed upgrade.`) + + t.Parallel() + + options.claimMachines(t, 4) + + clusterName := "integration-talos-upgrade" + + machineOptions := MachineOptions{ + TalosVersion: options.AnotherTalosVersion, + KubernetesVersion: options.AnotherKubernetesVersion, // use older Kubernetes compatible with AnotherTalosVersion + } + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 3, + Workers: 1, + + MachineOptions: machineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options, withTalosVersion(machineOptions.TalosVersion), withKubernetesVersion(machineOptions.KubernetesVersion)) + + if !options.SkipExtensionsCheckOnCreate { + t.Run( + "HelloWorldServiceExtensionShouldBePresent", + AssertExtensionIsPresent(t.Context(), options.omniClient, clusterName, HelloWorldServiceExtensionName), + ) + } + + t.Run( + "TalosSchematicUpdateShouldSucceed", + AssertTalosSchematicUpdateFlow(t.Context(), options.omniClient, clusterName), + ) + + t.Run( + "QemuGuestAgentExtensionShouldBePresent", + AssertExtensionIsPresent(t.Context(), options.omniClient, clusterName, QemuGuestAgentExtensionName), + ) + + t.Run( + "ClusterBootstrapManifestSyncShouldBeSuccessful", + KubernetesBootstrapManifestSync(t.Context(), options.omniClient.Management(), clusterName), + ) + + t.Run( + "TalosUpgradeShouldSucceed", + AssertTalosUpgradeFlow(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.TalosVersion), + ) + + t.Run( + "ClusterBootstrapManifestSyncShouldBeSuccessful", + KubernetesBootstrapManifestSync(t.Context(), options.omniClient.Management(), clusterName), + ) + + if !options.SkipExtensionsCheckOnCreate { + t.Run( + "HelloWorldServiceExtensionShouldBePresent", + AssertExtensionIsPresent(t.Context(), options.omniClient, clusterName, HelloWorldServiceExtensionName), + ) + } + + assertClusterAndAPIReady(t, clusterName, options, withTalosVersion(options.MachineOptions.TalosVersion), withKubernetesVersion(machineOptions.KubernetesVersion)) + + t.Run( + "FailedTalosUpgradeShouldBeRevertible", + AssertTalosUpgradeIsRevertible(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.TalosVersion), + ) + + t.Run( + "RunningTalosUpgradeShouldBeCancelable", + AssertTalosUpgradeIsCancelable(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.TalosVersion, options.AnotherTalosVersion), + ) + + assertClusterAndAPIReady(t, clusterName, options, withKubernetesVersion(machineOptions.KubernetesVersion)) + + t.Run( + "MaintenanceTestConfigShouldStillBePresent", + AssertMaintenanceTestConfigIsPresent(t.Context(), options.omniClient.Omni().State(), clusterName, 0), // check the maintenance config in the first machine + ) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testKubernetesUpgrades(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests upgrading Kubernetes version, including reverting a failed upgrade.`) + + t.Parallel() + + options.claimMachines(t, 4) + + clusterName := "integration-k8s-upgrade" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 3, + Workers: 1, + + MachineOptions: MachineOptions{ + TalosVersion: options.MachineOptions.TalosVersion, + KubernetesVersion: options.AnotherKubernetesVersion, + }, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options, withKubernetesVersion(options.AnotherKubernetesVersion)) + + t.Run( + "KubernetesUpgradeShouldSucceed", + AssertKubernetesUpgradeFlow( + t.Context(), options.omniClient.Omni().State(), options.omniClient.Management(), + clusterName, + options.MachineOptions.KubernetesVersion, + ), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "FailedKubernetesUpgradeShouldBeRevertible", + AssertKubernetesUpgradeIsRevertible(t.Context(), options.omniClient.Omni().State(), clusterName, options.MachineOptions.KubernetesVersion), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testEtcdBackupAndRestore(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests automatic & manual backup & restore for workload etcd. + +Automatic backups are enabled, done, and then a manual backup is created. +Afterwards, a cluster's control plane is destroyed then recovered from the backup. + +Finally, a completely new cluster is created using the same backup to test the "point-in-time recovery".`) + + t.Parallel() + + options.claimMachines(t, 6) + + clusterName := "integration-etcd-backup" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 3, + Workers: 1, + + EtcdBackup: &specs.EtcdBackupConf{ + Interval: durationpb.New(2 * time.Hour), + Enabled: true, + }, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + runTests(t, + AssertBlockKubernetesDeploymentCreateAndRunning(t.Context(), options.omniClient.Management(), + clusterName, + "default", + "test", + ), + ) + + t.Run( + "KubernetesSecretShouldBeCreated", + AssertKubernetesSecretIsCreated(t.Context(), options.omniClient.Management(), + clusterName, "default", "test", "backup-test-secret-val"), + ) + + t.Run( + "EtcdAutomaticBackupShouldBeCreated", + AssertEtcdAutomaticBackupIsCreated(t.Context(), options.omniClient.Omni().State(), clusterName), + ) + + t.Run( + "EtcdManualBackupShouldBeCreated", + AssertEtcdManualBackupIsCreated(t.Context(), options.omniClient.Omni().State(), clusterName), + ) + + secondClusterName := "integration-etcd-backup-new-cluster" + + runTests( + t, + AssertBlockCreateClusterFromEtcdBackup(t.Context(), options.omniClient, options.talosAPIKeyPrepare, options.Options, + clusterName, + secondClusterName, + "default", + "test", + ), + ) + + t.Run( + "EtcdSecretShouldBeSameAfterCreateFromBackup", + AssertKubernetesSecretHasValue(t.Context(), options.omniClient.Management(), secondClusterName, "default", "test", "backup-test-secret-val"), + ) + + t.Run( + "NewClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), secondClusterName, false, false), + ) + + runTests( + t, + AssertBlockRestoreEtcdFromLatestBackup(t.Context(), options.omniClient, options.talosAPIKeyPrepare, options.Options, + 3, + clusterName, + "default", + "test", + ), + ) + + t.Run( + "RestoredClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testMaintenanceUpgrade(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Test upgrading (downgrading) a machine in maintenance mode. + +Create a cluster out of a single machine on version1, remove cluster (the machine will stay on version1, Talos is installed). +Create a cluster out of the same machine on version2, Omni should upgrade the machine to version2 while in maintenance.`) + + t.Parallel() + + options.claimMachines(t, 1) + + t.Run( + "MachineShouldBeUpgradedInMaintenanceMode", + AssertMachineShouldBeUpgradedInMaintenanceMode( + t.Context(), options.omniClient, + "integration-maintenance-upgrade", + options.AnotherKubernetesVersion, + options.MachineOptions.TalosVersion, + options.AnotherTalosVersion, + options.talosAPIKeyPrepare, + ), + ) + } +} + +func testAuth(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Test authorization on accessing Omni API, some tests run without a cluster, some only run with a context of a cluster.`) + + t.Parallel() + + options.claimMachines(t, 1) + + t.Run( + "AnonymousRequestShouldBeDenied", + AssertAnonymousAuthenication(t.Context(), options.omniClient), + ) + + t.Run( + "InvalidSignatureShouldBeDenied", + AssertAPIInvalidSignature(t.Context(), options.omniClient), + ) + + t.Run( + "PublicKeyWithoutLifetimeShouldNotBeRegistered", + AssertPublicKeyWithoutLifetimeNotRegistered(t.Context(), options.omniClient), + ) + + t.Run( + "PublicKeyWithLongLifetimeShouldNotBeRegistered", + AssertPublicKeyWithLongLifetimeNotRegistered(t.Context(), options.omniClient), + ) + + t.Run( + "OmniconfigShouldBeDownloadable", + AssertOmniconfigDownload(t.Context(), options.omniClient), + ) + + t.Run( + "PublicKeyWithUnknownEmailShouldNotBeRegistered", + AssertRegisterPublicKeyWithUnknownEmail(t.Context(), options.omniClient), + ) + + t.Run( + "ServiceAccountAPIShouldWork", + AssertServiceAccountAPIFlow(t.Context(), options.omniClient), + ) + + t.Run( + "ResourceAuthzShouldWork", + AssertResourceAuthz(t.Context(), options.omniClient, options.clientConfig), + ) + + t.Run( + "ResourceAuthzWithACLShouldWork", + AssertResourceAuthzWithACL(t.Context(), options.omniClient, options.clientConfig), + ) + + clusterName := "integration-auth" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 0, + + Features: &specs.ClusterSpec_Features{ + UseEmbeddedDiscoveryService: true, + }, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "APIAuthorizationShouldBeTested", + AssertAPIAuthz(t.Context(), options.omniClient, options.clientConfig, clusterName), + ) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testClusterTemplate(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Test flow of cluster creation and scaling using cluster templates.`) + + t.Parallel() + + options.claimMachines(t, 5) + + t.Run( + "TestClusterTemplateFlow", + AssertClusterTemplateFlow(t.Context(), options.omniClient.Omni().State(), options.MachineOptions), + ) + } +} + +func testWorkloadProxy(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Test workload service proxying feature`) + + t.Parallel() + + options.claimMachines(t, 1) + + clusterName := "integration-workload-proxy" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 0, + + Features: &specs.ClusterSpec_Features{ + EnableWorkloadProxy: true, + }, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: options.SkipExtensionsCheckOnCreate, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "WorkloadProxyShouldBeTested", + AssertWorkloadProxy(t.Context(), options.omniClient, clusterName), + ) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, false), + ) + } +} + +func testStaticInfraProvider(options *TestOptions) TestFunc { + return func(t *testing.T) { + t.Log(` +Tests common Omni operations on machines created by a static infrastructure provider:, +Note: this test expects all machines to be provisioned by the bare-metal infra provider as it doesn't filter them. + +- create a 1+0 cluster - assert that cluster is healthy and ready +- scale it up to be 3+1 - assert that cluster is healthy and ready +- assert that machines are not ready to use (occupied) +- scale it down to be 1+0 - assert that cluster is healthy and ready +- destroy the cluster - assert that machines are wiped, then marked as ready to use +- create a new 3+1 cluster +- assert that cluster is healthy and ready +- remove links of the machines +`) + t.Parallel() + + clusterName := "integration-static-infra-provider" + + t.Run( + "ClusterShouldBeCreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 1, + Workers: 0, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: true, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeScaledUp", + ScaleClusterUp(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: 2, + Workers: 1, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ExtensionsShouldBeUpdated", + UpdateExtensions(t.Context(), options.omniClient, clusterName, []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}), + ) + + t.Run( + "MachinesShouldBeAllocated", + AssertInfraMachinesAreAllocated(t.Context(), options.omniClient.Omni().State(), clusterName, + options.MachineOptions.TalosVersion, []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}), + ) + + t.Run( + "ClusterShouldBeScaledDown", + ScaleClusterDown(t.Context(), options.omniClient.Omni().State(), ClusterOptions{ + Name: clusterName, + ControlPlanes: -2, + Workers: -1, + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, true), + ) + + t.Run( + "ClusterShouldBeRecreated", + CreateCluster(t.Context(), options.omniClient, ClusterOptions{ + Name: clusterName, + ControlPlanes: 3, + Workers: 1, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: true, + }), + ) + + assertClusterAndAPIReady(t, clusterName, options) + + t.Run( + "ClusterShouldBeDestroyed", + AssertDestroyCluster(t.Context(), options.omniClient.Omni().State(), clusterName, false, true), + ) + } +} diff --git a/cmd/integration-test/pkg/tests/talos.go b/internal/integration/talos_test.go similarity index 98% rename from cmd/integration-test/pkg/tests/talos.go rename to internal/integration/talos_test.go index 3980f4e1..bde8e645 100644 --- a/cmd/integration-test/pkg/tests/talos.go +++ b/internal/integration/talos_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "archive/zip" @@ -636,9 +638,11 @@ func AssertTalosUpgradeIsCancelable(testCtx context.Context, st state.State, clu events := make(chan state.Event) - require.NoError(t, st.WatchKind(ctx, omni.NewClusterMachineStatus(resources.DefaultNamespace, "").Metadata(), events), - state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterName)), - ) + t.Logf("watching for the machines in cluster %q", clusterName) + + require.NoError(t, st.WatchKind(ctx, omni.NewClusterMachineStatus(resources.DefaultNamespace, "").Metadata(), events, + state.WatchWithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterName)), + )) ids := []string{} @@ -679,6 +683,8 @@ func AssertTalosUpgradeIsCancelable(testCtx context.Context, st state.State, clu ids = append(ids, res.Metadata().ID()) + t.Logf("found machine %q, labels %#v", res.Metadata().ID(), res.Metadata().Labels()) + break outer } } diff --git a/cmd/integration-test/pkg/tests/template.go b/internal/integration/template_test.go similarity index 99% rename from cmd/integration-test/pkg/tests/template.go rename to internal/integration/template_test.go index dcc6bb9a..cc0e8e13 100644 --- a/cmd/integration-test/pkg/tests/template.go +++ b/internal/integration/template_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "bytes" diff --git a/cmd/integration-test/pkg/tests/testdata/cluster-1.tmpl.yaml b/internal/integration/testdata/cluster-1.tmpl.yaml similarity index 100% rename from cmd/integration-test/pkg/tests/testdata/cluster-1.tmpl.yaml rename to internal/integration/testdata/cluster-1.tmpl.yaml diff --git a/cmd/integration-test/pkg/tests/testdata/cluster-2.tmpl.yaml b/internal/integration/testdata/cluster-2.tmpl.yaml similarity index 100% rename from cmd/integration-test/pkg/tests/testdata/cluster-2.tmpl.yaml rename to internal/integration/testdata/cluster-2.tmpl.yaml diff --git a/cmd/integration-test/pkg/tests/testdata/sidero-labs-icon.svg b/internal/integration/testdata/sidero-labs-icon.svg similarity index 100% rename from cmd/integration-test/pkg/tests/testdata/sidero-labs-icon.svg rename to internal/integration/testdata/sidero-labs-icon.svg diff --git a/cmd/integration-test/pkg/tests/workload_proxy.go b/internal/integration/workload_proxy_test.go similarity index 98% rename from cmd/integration-test/pkg/tests/workload_proxy.go rename to internal/integration/workload_proxy_test.go index 2ca97635..666e7b32 100644 --- a/cmd/integration-test/pkg/tests/workload_proxy.go +++ b/internal/integration/workload_proxy_test.go @@ -3,7 +3,9 @@ // Use of this software is governed by the Business Source License // included in the LICENSE file. -package tests +//go:build integration + +package integration_test import ( "compress/gzip" @@ -33,8 +35,8 @@ import ( "github.com/siderolabs/omni/client/pkg/client" "github.com/siderolabs/omni/client/pkg/omni/resources/omni" - "github.com/siderolabs/omni/cmd/integration-test/pkg/clientconfig" "github.com/siderolabs/omni/internal/backend/workloadproxy" + "github.com/siderolabs/omni/internal/pkg/clientconfig" ) //go:embed testdata/sidero-labs-icon.svg diff --git a/cmd/integration-test/pkg/clientconfig/clientconfig.go b/internal/pkg/clientconfig/clientconfig.go similarity index 100% rename from cmd/integration-test/pkg/clientconfig/clientconfig.go rename to internal/pkg/clientconfig/clientconfig.go diff --git a/cmd/integration-test/pkg/clientconfig/register_key_debug.go b/internal/pkg/clientconfig/register_key_debug.go similarity index 100% rename from cmd/integration-test/pkg/clientconfig/register_key_debug.go rename to internal/pkg/clientconfig/register_key_debug.go diff --git a/cmd/integration-test/pkg/clientconfig/register_key_no_debug.go b/internal/pkg/clientconfig/register_key_no_debug.go similarity index 100% rename from cmd/integration-test/pkg/clientconfig/register_key_no_debug.go rename to internal/pkg/clientconfig/register_key_no_debug.go