test: run infra integration tests against Talemu provider

Now Talemu based tests set up `MachineRequestSet` with 30 machines
before the tests and tear it down after.

New blocks validate links and machine requests creation and deletion.

Fixes: https://github.com/siderolabs/omni/issues/366

Signed-off-by: Artem Chernyshev <artem.chernyshev@talos-systems.com>
This commit is contained in:
Artem Chernyshev 2024-09-09 22:45:30 +03:00
parent f83cf3b210
commit 81e08eb38b
No known key found for this signature in database
GPG Key ID: E084A2DF1143C14D
4 changed files with 217 additions and 35 deletions

View File

@ -0,0 +1,153 @@
// Copyright (c) 2024 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
package tests
import (
"context"
"fmt"
"slices"
"testing"
"time"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-retry/retry"
"github.com/stretchr/testify/require"
"github.com/siderolabs/omni/client/pkg/client"
"github.com/siderolabs/omni/client/pkg/omni/resources"
"github.com/siderolabs/omni/client/pkg/omni/resources/infra"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
"github.com/siderolabs/omni/client/pkg/omni/resources/siderolink"
)
// AssertMachinesShouldBeProvisioned creates a machine request set and waits until all requests are fulfilled.
//
//nolint:gocognit
func AssertMachinesShouldBeProvisioned(testCtx context.Context, client *client.Client, machineCount int, machineRequestSetName,
talosVersion, infraProvider string,
) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Second*60)
defer cancel()
machineRequestSet, err := safe.ReaderGetByID[*omni.MachineRequestSet](ctx, client.Omni().State(), machineRequestSetName)
if !state.IsNotFoundError(err) {
require.NoError(t, err)
}
if machineRequestSet != nil {
rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName})
}
machineRequestSet = omni.NewMachineRequestSet(resources.DefaultNamespace, machineRequestSetName)
machineRequestSet.TypedSpec().Value.Extensions = []string{
"siderolabs/" + HelloWorldServiceExtensionName,
}
machineRequestSet.TypedSpec().Value.ProviderId = infraProvider
machineRequestSet.TypedSpec().Value.TalosVersion = talosVersion
machineRequestSet.TypedSpec().Value.MachineCount = int32(machineCount)
require.NoError(t, client.Omni().State().Create(ctx, machineRequestSet))
var resources safe.List[*infra.MachineRequestStatus]
err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error {
resources, err = safe.ReaderListAll[*infra.MachineRequestStatus](ctx, client.Omni().State(),
state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)),
)
if err != nil {
return err
}
if resources.Len() != machineCount {
return retry.ExpectedErrorf("provision machine count is %d, expected %d", resources.Len(), machineCount)
}
return nil
})
require.NoError(t, err)
err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error {
var machines safe.List[*omni.MachineStatus]
machines, err = safe.ReaderListAll[*omni.MachineStatus](ctx, client.Omni().State())
if err != nil {
return err
}
if machines.Len() < machineCount {
return retry.ExpectedErrorf("links count is %d, expected at least %d", resources.Len(), machineCount)
}
for r := range resources.All() {
requestedMachines := machines.FilterLabelQuery(resource.LabelEqual(omni.LabelMachineRequest, r.Metadata().ID()))
if requestedMachines.Len() == 0 {
return retry.ExpectedErrorf("machine request %q doesn't have the related link", r.Metadata().ID())
}
if requestedMachines.Len() != 1 {
return fmt.Errorf("more than one machine is labeled with %q machine request label", r.Metadata().ID())
}
m := requestedMachines.Get(0)
if m.TypedSpec().Value.Hardware == nil {
return retry.ExpectedErrorf("the machine %q is not fully provisioned", r.Metadata().ID())
}
}
return nil
})
require.NoError(t, err)
}
}
// AssertMachinesShouldBeDeprovisioned removes the machine request set and checks that all related links were deleted.
func AssertMachinesShouldBeDeprovisioned(testCtx context.Context, client *client.Client, machineRequestSetName string) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Second*60)
defer cancel()
requestIDs := rtestutils.ResourceIDs[*infra.MachineRequest](ctx, t, client.Omni().State(),
state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)),
)
links, err := safe.ReaderListAll[*siderolink.Link](ctx, client.Omni().State())
require.NoError(t, err)
linkIDs := make([]string, 0, len(requestIDs))
for l := range links.All() {
mr, ok := l.Metadata().Labels().Get(omni.LabelMachineRequest)
if !ok {
continue
}
if slices.Index(requestIDs, mr) != -1 {
linkIDs = append(linkIDs, l.Metadata().ID())
}
}
rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName})
for _, id := range requestIDs {
rtestutils.AssertNoResource[*infra.MachineRequest](ctx, t, client.Omni().State(), id)
}
for _, id := range linkIDs {
rtestutils.AssertNoResource[*siderolink.Link](ctx, t, client.Omni().State(), id)
}
}
}

View File

@ -8,7 +8,6 @@ package tests
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
@ -54,9 +53,10 @@ type TalosAPIKeyPrepareFunc func(ctx context.Context, contextName string) error
type Options struct { type Options struct {
RunTestPattern string RunTestPattern string
CleanupLinks bool CleanupLinks bool
RunStatsCheck bool RunStatsCheck bool
ExpectedMachines int ExpectedMachines int
ProvisionMachines int
RestartAMachineFunc RestartAMachineFunc RestartAMachineFunc RestartAMachineFunc
WipeAMachineFunc WipeAMachineFunc WipeAMachineFunc WipeAMachineFunc
@ -68,6 +68,7 @@ type Options struct {
AnotherTalosVersion string AnotherTalosVersion string
AnotherKubernetesVersion string AnotherKubernetesVersion string
OmnictlPath string OmnictlPath string
InfraProvider string
} }
// Run the integration tests. // Run the integration tests.
@ -1144,8 +1145,6 @@ Test flow of cluster creation and scaling using cluster templates.`,
var re *regexp.Regexp var re *regexp.Regexp
if options.RunTestPattern != "" { if options.RunTestPattern != "" {
var err error
if re, err = regexp.Compile(options.RunTestPattern); err != nil { if re, err = regexp.Compile(options.RunTestPattern); err != nil {
log.Printf("run test pattern parse error: %s", err) log.Printf("run test pattern parse error: %s", err)
@ -1180,33 +1179,47 @@ Test flow of cluster creation and scaling using cluster templates.`,
} }
} }
preRunTests := []testing.InternalTest{}
if options.ProvisionMachines != 0 {
preRunTests = append(preRunTests, testing.InternalTest{
Name: "AssertMachinesShouldBeProvisioned",
F: AssertMachinesShouldBeProvisioned(ctx, rootClient, options.ProvisionMachines, "main", options.MachineOptions.TalosVersion, options.InfraProvider),
})
}
if len(preRunTests) > 0 {
if err = runTests(preRunTests); err != nil {
return err
}
}
machineSemaphore := semaphore.NewWeighted(int64(options.ExpectedMachines)) machineSemaphore := semaphore.NewWeighted(int64(options.ExpectedMachines))
exitCode := testing.MainStart( if err = runTests(makeTests(ctx, testsToRun, machineSemaphore)); err != nil {
matchStringOnly(func(string, string) (bool, error) { return true, nil }), return err
makeTests(ctx, testsToRun, machineSemaphore), }
nil,
nil,
nil,
).Run()
extraTests := []testing.InternalTest{} postRunTests := []testing.InternalTest{}
if options.ProvisionMachines != 0 {
postRunTests = append(postRunTests, testing.InternalTest{
Name: "AssertMachinesShouldBeDeprovisioned",
F: AssertMachinesShouldBeDeprovisioned(ctx, rootClient, "main"),
})
}
if options.RunStatsCheck { if options.RunStatsCheck {
extraTests = append(extraTests, testing.InternalTest{ postRunTests = append(postRunTests, testing.InternalTest{
Name: "AssertStatsLimits", Name: "AssertStatsLimits",
F: AssertStatsLimits(ctx), F: AssertStatsLimits(ctx),
}) })
} }
if len(extraTests) > 0 && exitCode == 0 { if len(postRunTests) > 0 {
exitCode = testing.MainStart( if err = runTests(postRunTests); err != nil {
matchStringOnly(func(string, string) (bool, error) { return true, nil }), return err
extraTests, }
nil,
nil,
nil,
).Run()
} }
if options.CleanupLinks { if options.CleanupLinks {
@ -1215,8 +1228,20 @@ Test flow of cluster creation and scaling using cluster templates.`,
} }
} }
return nil
}
func runTests(testsToRun []testing.InternalTest) error {
exitCode := testing.MainStart(
matchStringOnly(func(string, string) (bool, error) { return true, nil }),
testsToRun,
nil,
nil,
nil,
).Run()
if exitCode != 0 { if exitCode != 0 {
return errors.New("test failed") return fmt.Errorf("test failed")
} }
return nil return nil

View File

@ -42,14 +42,16 @@ var rootCmd = &cobra.Command{
testOptions := tests.Options{ testOptions := tests.Options{
RunTestPattern: rootCmdFlags.runTestPattern, RunTestPattern: rootCmdFlags.runTestPattern,
ExpectedMachines: rootCmdFlags.expectedMachines, ExpectedMachines: rootCmdFlags.expectedMachines,
CleanupLinks: rootCmdFlags.cleanupLinks, CleanupLinks: rootCmdFlags.cleanupLinks,
RunStatsCheck: rootCmdFlags.runStatsCheck, RunStatsCheck: rootCmdFlags.runStatsCheck,
ProvisionMachines: rootCmdFlags.provisionMachinesCount,
MachineOptions: rootCmdFlags.machineOptions, MachineOptions: rootCmdFlags.machineOptions,
AnotherTalosVersion: rootCmdFlags.anotherTalosVersion, AnotherTalosVersion: rootCmdFlags.anotherTalosVersion,
AnotherKubernetesVersion: rootCmdFlags.anotherKubernetesVersion, AnotherKubernetesVersion: rootCmdFlags.anotherKubernetesVersion,
OmnictlPath: rootCmdFlags.omnictlPath, OmnictlPath: rootCmdFlags.omnictlPath,
InfraProvider: rootCmdFlags.infraProvider,
} }
if rootCmdFlags.restartAMachineScript != "" { if rootCmdFlags.restartAMachineScript != "" {
@ -116,11 +118,13 @@ func execCmd(ctx context.Context, parsedScript []string, args ...string) error {
var rootCmdFlags struct { var rootCmdFlags struct {
endpoint string endpoint string
runTestPattern string runTestPattern string
infraProvider string
expectedMachines int provisionMachinesCount int
parallel int64 expectedMachines int
cleanupLinks bool parallel int64
runStatsCheck bool cleanupLinks bool
runStatsCheck bool
testsTimeout time.Duration testsTimeout time.Duration
@ -158,6 +162,8 @@ func init() {
rootCmd.Flags().DurationVarP(&rootCmdFlags.testsTimeout, "timeout", "t", time.Hour, "tests global timeout") rootCmd.Flags().DurationVarP(&rootCmdFlags.testsTimeout, "timeout", "t", time.Hour, "tests global timeout")
rootCmd.Flags().BoolVar(&rootCmdFlags.cleanupLinks, "cleanup-links", false, "remove all links after the tests are complete") rootCmd.Flags().BoolVar(&rootCmdFlags.cleanupLinks, "cleanup-links", false, "remove all links after the tests are complete")
rootCmd.Flags().BoolVar(&rootCmdFlags.runStatsCheck, "run-stats-check", false, "runs stats check after the test is complete") rootCmd.Flags().BoolVar(&rootCmdFlags.runStatsCheck, "run-stats-check", false, "runs stats check after the test is complete")
rootCmd.Flags().IntVar(&rootCmdFlags.provisionMachinesCount, "provision-machines", 0, "provisions machines through the infrastructure provider")
rootCmd.Flags().StringVar(&rootCmdFlags.infraProvider, "infra-provider", "talemu", "use infra provider with the specified ID when provisioning the machines")
} }
// withContext wraps with CLI context. // withContext wraps with CLI context.

View File

@ -129,9 +129,7 @@ KERNEL_ARGS="siderolink.api=grpc://$LOCAL_IP:8090?jointoken=${JOIN_TOKEN} talos.
if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then
PROMETHEUS_CONTAINER=$(docker run --network host -p "9090:9090" -v "$(pwd)/hack/compose/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml" -it --rm -d prom/prometheus) PROMETHEUS_CONTAINER=$(docker run --network host -p "9090:9090" -v "$(pwd)/hack/compose/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml" -it --rm -d prom/prometheus)
TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu:latest --kernel-args="${KERNEL_ARGS}" --machines=30) TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu-infra-provider:latest --create-service-account --omni-api-endpoint=https://$LOCAL_IP:8099)
sleep 10
SSL_CERT_DIR=hack/certs:/etc/ssl/certs \ SSL_CERT_DIR=hack/certs:/etc/ssl/certs \
${ARTIFACTS}/integration-test-linux-amd64 \ ${ARTIFACTS}/integration-test-linux-amd64 \
@ -139,7 +137,7 @@ if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then
--talos-version=${TALOS_VERSION} \ --talos-version=${TALOS_VERSION} \
--omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \ --omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \
--expected-machines=30 \ --expected-machines=30 \
--cleanup-links \ --provision-machines=30 \
--run-stats-check \ --run-stats-check \
-t 4m \ -t 4m \
-p 10 \ -p 10 \