mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-08 14:11:13 +02:00
It seems that CRI has a bit of eventual consistency, and it might fail to remove a stopped pod failing that it's still running. Rewrite the upgrade API call in the upgrade test to actually wait for the upgrade to be successful, and fail immediately if it's not successful. This should improve the test stability and it should make it easier to find issues immediately. Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
797 lines
22 KiB
Go
797 lines
22 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
//go:build integration_provision
|
|
|
|
package provision
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/netip"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/siderolabs/gen/slices"
|
|
"github.com/siderolabs/go-blockdevice/blockdevice/encryption"
|
|
"github.com/siderolabs/go-kubernetes/kubernetes/upgrade"
|
|
"github.com/siderolabs/go-retry/retry"
|
|
sideronet "github.com/siderolabs/net"
|
|
"github.com/stretchr/testify/suite"
|
|
corev1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/strategicpatch"
|
|
|
|
"github.com/siderolabs/talos/cmd/talosctl/pkg/mgmt/helpers"
|
|
"github.com/siderolabs/talos/internal/integration/base"
|
|
"github.com/siderolabs/talos/pkg/cluster/check"
|
|
"github.com/siderolabs/talos/pkg/cluster/kubernetes"
|
|
"github.com/siderolabs/talos/pkg/cluster/sonobuoy"
|
|
"github.com/siderolabs/talos/pkg/images"
|
|
machineapi "github.com/siderolabs/talos/pkg/machinery/api/machine"
|
|
talosclient "github.com/siderolabs/talos/pkg/machinery/client"
|
|
clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config"
|
|
"github.com/siderolabs/talos/pkg/machinery/config"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/bundle"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/generate"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/machine"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
|
|
"github.com/siderolabs/talos/pkg/machinery/constants"
|
|
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
|
|
"github.com/siderolabs/talos/pkg/provision"
|
|
"github.com/siderolabs/talos/pkg/provision/access"
|
|
"github.com/siderolabs/talos/pkg/provision/providers/qemu"
|
|
)
|
|
|
|
//nolint:maligned
|
|
type upgradeSpec struct {
|
|
ShortName string
|
|
|
|
SourceKernelPath string
|
|
SourceInitramfsPath string
|
|
SourceInstallerImage string
|
|
SourceVersion string
|
|
SourceK8sVersion string
|
|
|
|
TargetInstallerImage string
|
|
TargetVersion string
|
|
TargetK8sVersion string
|
|
|
|
SkipKubeletUpgrade bool
|
|
|
|
MasterNodes int
|
|
WorkerNodes int
|
|
|
|
UpgradePreserve bool
|
|
UpgradeStage bool
|
|
WithEncryption bool
|
|
}
|
|
|
|
const (
|
|
// These versions should be kept in sync with Makefile variable RELEASES.
|
|
previousRelease = "v1.3.7"
|
|
stableRelease = "v1.4.5" // or soon-to-be-stable
|
|
// The current version (the one being built on CI) is DefaultSettings.CurrentVersion.
|
|
|
|
// Command to find Kubernetes version for past releases:
|
|
//
|
|
// git show ${TAG}:pkg/machinery/constants/constants.go | grep KubernetesVersion
|
|
previousK8sVersion = "1.26.2" // constants.DefaultKubernetesVersion in the previousRelease
|
|
stableK8sVersion = "1.27.3" // constants.DefaultKubernetesVersion in the stableRelease
|
|
currentK8sVersion = constants.DefaultKubernetesVersion
|
|
)
|
|
|
|
var defaultNameservers = []netip.Addr{netip.MustParseAddr("8.8.8.8"), netip.MustParseAddr("1.1.1.1")}
|
|
|
|
// upgradePreviousToStable upgrades from the previous Talos release to the stable release.
|
|
func upgradePreviousToStable() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("%s-%s", previousRelease, stableRelease),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(previousRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(
|
|
filepath.Join(
|
|
trimVersion(previousRelease),
|
|
constants.InitramfsAsset,
|
|
),
|
|
),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", previousRelease),
|
|
SourceVersion: previousRelease,
|
|
SourceK8sVersion: previousK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
TargetVersion: stableRelease,
|
|
TargetK8sVersion: stableK8sVersion,
|
|
|
|
MasterNodes: DefaultSettings.MasterNodes,
|
|
WorkerNodes: DefaultSettings.WorkerNodes,
|
|
}
|
|
}
|
|
|
|
// upgradeStableToCurrent upgrades from the stable Talos release to the current version.
|
|
func upgradeStableToCurrent() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("%s-%s", stableRelease, DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.InitramfsAsset)),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
SourceVersion: stableRelease,
|
|
SourceK8sVersion: stableK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
),
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: DefaultSettings.MasterNodes,
|
|
WorkerNodes: DefaultSettings.WorkerNodes,
|
|
}
|
|
}
|
|
|
|
// upgradeCurrentToCurrent upgrades the current version to itself.
|
|
func upgradeCurrentToCurrent() upgradeSpec {
|
|
installerImage := fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
)
|
|
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("%s-same-ver", DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(constants.KernelAssetWithArch),
|
|
SourceInitramfsPath: helpers.ArtifactPath(constants.InitramfsAssetWithArch),
|
|
SourceInstallerImage: installerImage,
|
|
SourceVersion: DefaultSettings.CurrentVersion,
|
|
SourceK8sVersion: currentK8sVersion,
|
|
|
|
TargetInstallerImage: installerImage,
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: DefaultSettings.MasterNodes,
|
|
WorkerNodes: DefaultSettings.WorkerNodes,
|
|
|
|
WithEncryption: true,
|
|
}
|
|
}
|
|
|
|
// upgradeStableToCurrentPreserve upgrades from the stable Talos release to the current version for single-node cluster with preserve.
|
|
func upgradeStableToCurrentPreserve() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("prsrv-%s-%s", stableRelease, DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.InitramfsAsset)),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
SourceVersion: stableRelease,
|
|
SourceK8sVersion: stableK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
),
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: 1,
|
|
WorkerNodes: 0,
|
|
UpgradePreserve: true,
|
|
}
|
|
}
|
|
|
|
// upgradeStableToCurrentPreserveStage upgrades from the stable Talos release to the current version for single-node cluster with preserve and stage.
|
|
func upgradeStableToCurrentPreserveStage() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("prsrv-stg-%s-%s", stableRelease, DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.InitramfsAsset)),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
SourceVersion: stableRelease,
|
|
SourceK8sVersion: stableK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
),
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: 1,
|
|
WorkerNodes: 0,
|
|
UpgradePreserve: true,
|
|
UpgradeStage: true,
|
|
}
|
|
}
|
|
|
|
// UpgradeSuite ...
|
|
type UpgradeSuite struct {
|
|
suite.Suite
|
|
base.TalosSuite
|
|
|
|
specGen func() upgradeSpec
|
|
spec upgradeSpec
|
|
|
|
track int
|
|
|
|
provisioner provision.Provisioner
|
|
|
|
configBundle *bundle.Bundle
|
|
|
|
clusterAccess *access.Adapter
|
|
controlPlaneEndpoint string
|
|
|
|
//nolint:containedctx
|
|
ctx context.Context
|
|
ctxCancel context.CancelFunc
|
|
|
|
stateDir string
|
|
cniDir string
|
|
}
|
|
|
|
// SetupSuite ...
|
|
func (suite *UpgradeSuite) SetupSuite() {
|
|
// call generate late in the flow, as it needs to pick up settings overridden by test runner
|
|
suite.spec = suite.specGen()
|
|
|
|
suite.T().Logf("upgrade spec = %v", suite.spec)
|
|
|
|
// timeout for the whole test
|
|
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 30*time.Minute)
|
|
|
|
var err error
|
|
|
|
suite.provisioner, err = qemu.NewProvisioner(suite.ctx)
|
|
suite.Require().NoError(err)
|
|
}
|
|
|
|
// TearDownSuite ...
|
|
func (suite *UpgradeSuite) TearDownSuite() {
|
|
if suite.T().Failed() && DefaultSettings.CrashdumpEnabled && suite.Cluster != nil {
|
|
// for failed tests, produce crash dump for easier debugging,
|
|
// as cluster is going to be torn down below
|
|
suite.provisioner.CrashDump(suite.ctx, suite.Cluster, os.Stderr)
|
|
|
|
if suite.clusterAccess != nil {
|
|
suite.clusterAccess.CrashDump(suite.ctx, os.Stderr)
|
|
}
|
|
}
|
|
|
|
if suite.clusterAccess != nil {
|
|
suite.Assert().NoError(suite.clusterAccess.Close())
|
|
}
|
|
|
|
if suite.Cluster != nil {
|
|
suite.Assert().NoError(suite.provisioner.Destroy(suite.ctx, suite.Cluster))
|
|
}
|
|
|
|
suite.ctxCancel()
|
|
|
|
if suite.stateDir != "" {
|
|
suite.Assert().NoError(os.RemoveAll(suite.stateDir))
|
|
}
|
|
|
|
if suite.provisioner != nil {
|
|
suite.Assert().NoError(suite.provisioner.Close())
|
|
}
|
|
}
|
|
|
|
// setupCluster provisions source clusters and waits for health.
|
|
func (suite *UpgradeSuite) setupCluster() {
|
|
defaultStateDir, err := clientconfig.GetTalosDirectory()
|
|
suite.Require().NoError(err)
|
|
|
|
suite.stateDir = filepath.Join(defaultStateDir, "clusters")
|
|
suite.cniDir = filepath.Join(defaultStateDir, "cni")
|
|
|
|
clusterName := suite.spec.ShortName
|
|
|
|
cidr, err := netip.ParsePrefix(DefaultSettings.CIDR)
|
|
suite.Require().NoError(err)
|
|
|
|
var gatewayIP netip.Addr
|
|
|
|
gatewayIP, err = sideronet.NthIPInNetwork(cidr, 1)
|
|
suite.Require().NoError(err)
|
|
|
|
ips := make([]netip.Addr, suite.spec.MasterNodes+suite.spec.WorkerNodes)
|
|
|
|
for i := range ips {
|
|
ips[i], err = sideronet.NthIPInNetwork(cidr, i+2)
|
|
suite.Require().NoError(err)
|
|
}
|
|
|
|
suite.T().Logf("initializing provisioner with cluster name %q, state directory %q", clusterName, suite.stateDir)
|
|
|
|
request := provision.ClusterRequest{
|
|
Name: clusterName,
|
|
|
|
Network: provision.NetworkRequest{
|
|
Name: clusterName,
|
|
CIDRs: []netip.Prefix{cidr},
|
|
GatewayAddrs: []netip.Addr{gatewayIP},
|
|
MTU: DefaultSettings.MTU,
|
|
Nameservers: defaultNameservers,
|
|
CNI: provision.CNIConfig{
|
|
BinPath: []string{filepath.Join(suite.cniDir, "bin")},
|
|
ConfDir: filepath.Join(suite.cniDir, "conf.d"),
|
|
CacheDir: filepath.Join(suite.cniDir, "cache"),
|
|
|
|
BundleURL: DefaultSettings.CNIBundleURL,
|
|
},
|
|
},
|
|
|
|
KernelPath: suite.spec.SourceKernelPath,
|
|
InitramfsPath: suite.spec.SourceInitramfsPath,
|
|
|
|
SelfExecutable: suite.TalosctlPath,
|
|
StateDirectory: suite.stateDir,
|
|
}
|
|
|
|
defaultInternalLB, _ := suite.provisioner.GetLoadBalancers(request.Network)
|
|
suite.controlPlaneEndpoint = fmt.Sprintf("https://%s", nethelpers.JoinHostPort(defaultInternalLB, constants.DefaultControlPlanePort))
|
|
|
|
genOptions := suite.provisioner.GenOptions(request.Network)
|
|
|
|
for _, registryMirror := range DefaultSettings.RegistryMirrors {
|
|
parts := strings.SplitN(registryMirror, "=", 2)
|
|
suite.Require().Len(parts, 2)
|
|
|
|
genOptions = append(genOptions, generate.WithRegistryMirror(parts[0], parts[1]))
|
|
}
|
|
|
|
masterEndpoints := make([]string, suite.spec.MasterNodes)
|
|
for i := range masterEndpoints {
|
|
masterEndpoints[i] = ips[i].String()
|
|
}
|
|
|
|
if DefaultSettings.CustomCNIURL != "" {
|
|
genOptions = append(
|
|
genOptions, generate.WithClusterCNIConfig(
|
|
&v1alpha1.CNIConfig{
|
|
CNIName: constants.CustomCNI,
|
|
CNIUrls: []string{DefaultSettings.CustomCNIURL},
|
|
},
|
|
),
|
|
)
|
|
}
|
|
|
|
if suite.spec.WithEncryption {
|
|
genOptions = append(
|
|
genOptions, generate.WithSystemDiskEncryption(
|
|
&v1alpha1.SystemDiskEncryptionConfig{
|
|
StatePartition: &v1alpha1.EncryptionConfig{
|
|
EncryptionProvider: encryption.LUKS2,
|
|
EncryptionKeys: []*v1alpha1.EncryptionKey{
|
|
{
|
|
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
|
|
KeySlot: 0,
|
|
},
|
|
},
|
|
},
|
|
EphemeralPartition: &v1alpha1.EncryptionConfig{
|
|
EncryptionProvider: encryption.LUKS2,
|
|
EncryptionKeys: []*v1alpha1.EncryptionKey{
|
|
{
|
|
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
|
|
KeySlot: 0,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
),
|
|
)
|
|
}
|
|
|
|
versionContract, err := config.ParseContractFromVersion(suite.spec.SourceVersion)
|
|
suite.Require().NoError(err)
|
|
|
|
suite.configBundle, err = bundle.NewBundle(
|
|
bundle.WithInputOptions(
|
|
&bundle.InputOptions{
|
|
ClusterName: clusterName,
|
|
Endpoint: suite.controlPlaneEndpoint,
|
|
KubeVersion: suite.spec.SourceK8sVersion,
|
|
GenOptions: append(
|
|
genOptions,
|
|
generate.WithEndpointList(masterEndpoints),
|
|
generate.WithInstallImage(suite.spec.SourceInstallerImage),
|
|
generate.WithDNSDomain("cluster.local"),
|
|
generate.WithVersionContract(versionContract),
|
|
),
|
|
},
|
|
),
|
|
)
|
|
suite.Require().NoError(err)
|
|
|
|
for i := 0; i < suite.spec.MasterNodes; i++ {
|
|
request.Nodes = append(
|
|
request.Nodes,
|
|
provision.NodeRequest{
|
|
Name: fmt.Sprintf("master-%d", i+1),
|
|
Type: machine.TypeControlPlane,
|
|
IPs: []netip.Addr{ips[i]},
|
|
Memory: DefaultSettings.MemMB * 1024 * 1024,
|
|
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
|
|
Disks: []*provision.Disk{
|
|
{
|
|
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
|
|
},
|
|
},
|
|
Config: suite.configBundle.ControlPlane(),
|
|
},
|
|
)
|
|
}
|
|
|
|
for i := 1; i <= suite.spec.WorkerNodes; i++ {
|
|
request.Nodes = append(
|
|
request.Nodes,
|
|
provision.NodeRequest{
|
|
Name: fmt.Sprintf("worker-%d", i),
|
|
Type: machine.TypeWorker,
|
|
IPs: []netip.Addr{ips[suite.spec.MasterNodes+i-1]},
|
|
Memory: DefaultSettings.MemMB * 1024 * 1024,
|
|
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
|
|
Disks: []*provision.Disk{
|
|
{
|
|
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
|
|
},
|
|
},
|
|
Config: suite.configBundle.Worker(),
|
|
},
|
|
)
|
|
}
|
|
|
|
suite.Cluster, err = suite.provisioner.Create(
|
|
suite.ctx, request,
|
|
provision.WithBootlader(true),
|
|
provision.WithUEFI(true),
|
|
provision.WithTalosConfig(suite.configBundle.TalosConfig()),
|
|
)
|
|
suite.Require().NoError(err)
|
|
|
|
c, err := clientconfig.Open("")
|
|
suite.Require().NoError(err)
|
|
|
|
c.Merge(suite.configBundle.TalosConfig())
|
|
|
|
suite.Require().NoError(c.Save(""))
|
|
|
|
suite.clusterAccess = access.NewAdapter(suite.Cluster, provision.WithTalosConfig(suite.configBundle.TalosConfig()))
|
|
|
|
suite.Require().NoError(suite.clusterAccess.Bootstrap(suite.ctx, os.Stdout))
|
|
|
|
suite.waitForClusterHealth()
|
|
}
|
|
|
|
// waitForClusterHealth asserts cluster health after any change.
|
|
func (suite *UpgradeSuite) waitForClusterHealth() {
|
|
runs := 1
|
|
|
|
singleNodeCluster := len(suite.Cluster.Info().Nodes) == 1
|
|
if singleNodeCluster {
|
|
// run health check several times for single node clusters,
|
|
// as self-hosted control plane is not stable after reboot
|
|
runs = 3
|
|
}
|
|
|
|
for run := 0; run < runs; run++ {
|
|
if run > 0 {
|
|
time.Sleep(15 * time.Second)
|
|
}
|
|
|
|
checkCtx, checkCtxCancel := context.WithTimeout(suite.ctx, 15*time.Minute)
|
|
defer checkCtxCancel()
|
|
|
|
suite.Require().NoError(
|
|
check.Wait(
|
|
checkCtx,
|
|
suite.clusterAccess,
|
|
check.DefaultClusterChecks(),
|
|
check.StderrReporter(),
|
|
),
|
|
)
|
|
}
|
|
}
|
|
|
|
// runE2E runs e2e test on the cluster.
|
|
func (suite *UpgradeSuite) runE2E(k8sVersion string) {
|
|
if suite.spec.WorkerNodes == 0 {
|
|
// no worker nodes, should make masters schedulable
|
|
suite.untaint("master-1")
|
|
}
|
|
|
|
options := sonobuoy.DefaultOptions()
|
|
options.KubernetesVersion = k8sVersion
|
|
|
|
suite.Assert().NoError(sonobuoy.Run(suite.ctx, suite.clusterAccess, options))
|
|
}
|
|
|
|
func (suite *UpgradeSuite) assertSameVersionCluster(client *talosclient.Client, expectedVersion string) {
|
|
nodes := slices.Map(suite.Cluster.Info().Nodes, func(node provision.NodeInfo) string { return node.IPs[0].String() })
|
|
ctx := talosclient.WithNodes(suite.ctx, nodes...)
|
|
|
|
var v *machineapi.VersionResponse
|
|
|
|
err := retry.Constant(
|
|
time.Minute,
|
|
).Retry(
|
|
func() error {
|
|
var e error
|
|
v, e = client.Version(ctx)
|
|
|
|
return retry.ExpectedError(e)
|
|
},
|
|
)
|
|
|
|
suite.Require().NoError(err)
|
|
|
|
suite.Require().Len(v.Messages, len(nodes))
|
|
|
|
for _, version := range v.Messages {
|
|
suite.Assert().Equal(expectedVersion, version.Version.Tag)
|
|
}
|
|
}
|
|
|
|
func (suite *UpgradeSuite) readVersion(nodeCtx context.Context, client *talosclient.Client) (
|
|
version string,
|
|
err error,
|
|
) {
|
|
var v *machineapi.VersionResponse
|
|
|
|
v, err = client.Version(nodeCtx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
version = v.Messages[0].Version.Tag
|
|
|
|
return
|
|
}
|
|
|
|
//nolint:gocyclo
|
|
func (suite *UpgradeSuite) upgradeNode(client *talosclient.Client, node provision.NodeInfo) {
|
|
suite.T().Logf("upgrading node %s", node.IPs[0])
|
|
|
|
ctx, cancel := context.WithCancel(suite.ctx)
|
|
defer cancel()
|
|
|
|
nodeCtx := talosclient.WithNodes(ctx, node.IPs[0].String())
|
|
|
|
var (
|
|
resp *machineapi.UpgradeResponse
|
|
err error
|
|
)
|
|
|
|
err = retry.Constant(time.Minute, retry.WithUnits(10*time.Second)).Retry(
|
|
func() error {
|
|
resp, err = client.Upgrade(
|
|
nodeCtx,
|
|
suite.spec.TargetInstallerImage,
|
|
suite.spec.UpgradePreserve,
|
|
suite.spec.UpgradeStage,
|
|
false,
|
|
)
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "leader changed") {
|
|
return retry.ExpectedError(err)
|
|
}
|
|
|
|
if strings.Contains(err.Error(), "failed to acquire upgrade lock") {
|
|
return retry.ExpectedError(err)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
},
|
|
)
|
|
|
|
suite.Require().NoError(err)
|
|
suite.Require().Equal("Upgrade request received", resp.Messages[0].Ack)
|
|
|
|
actorID := resp.Messages[0].ActorId
|
|
|
|
eventCh := make(chan talosclient.EventResult)
|
|
|
|
// watch for events
|
|
suite.Require().NoError(client.EventsWatchV2(nodeCtx, eventCh, talosclient.WithActorID(actorID), talosclient.WithTailEvents(-1)))
|
|
|
|
waitTimer := time.NewTimer(5 * time.Minute)
|
|
defer waitTimer.Stop()
|
|
|
|
waitLoop:
|
|
for {
|
|
select {
|
|
case ev := <-eventCh:
|
|
suite.Require().NoError(ev.Error)
|
|
|
|
switch msg := ev.Event.Payload.(type) {
|
|
case *machineapi.SequenceEvent:
|
|
if msg.Error != nil {
|
|
suite.FailNow("upgrade failed", "%s: %s", msg.Error.Message, msg.Error.Code)
|
|
}
|
|
case *machineapi.PhaseEvent:
|
|
if msg.Action == machineapi.PhaseEvent_START && msg.Phase == "kexec" {
|
|
// about to be rebooted
|
|
break waitLoop
|
|
}
|
|
|
|
if msg.Action == machineapi.PhaseEvent_STOP {
|
|
suite.T().Logf("upgrade phase %q finished", msg.Phase)
|
|
}
|
|
}
|
|
case <-waitTimer.C:
|
|
suite.FailNow("timeout waiting for upgrade to finish")
|
|
case <-ctx.Done():
|
|
suite.FailNow("context canceled")
|
|
}
|
|
}
|
|
|
|
// wait for the apid to be shut down
|
|
time.Sleep(10 * time.Second)
|
|
|
|
// wait for the version to be equal to target version
|
|
suite.Require().NoError(
|
|
retry.Constant(10 * time.Minute).Retry(
|
|
func() error {
|
|
var version string
|
|
|
|
version, err = suite.readVersion(nodeCtx, client)
|
|
if err != nil {
|
|
// API might be unresponsive during upgrade
|
|
return retry.ExpectedError(err)
|
|
}
|
|
|
|
if version != suite.spec.TargetVersion {
|
|
// upgrade not finished yet
|
|
return retry.ExpectedError(
|
|
fmt.Errorf(
|
|
"node %q version doesn't match expected: expected %q, got %q",
|
|
node.IPs[0].String(),
|
|
suite.spec.TargetVersion,
|
|
version,
|
|
),
|
|
)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
),
|
|
)
|
|
|
|
suite.waitForClusterHealth()
|
|
}
|
|
|
|
func (suite *UpgradeSuite) upgradeKubernetes(fromVersion, toVersion string, skipKubeletUpgrade bool) {
|
|
if fromVersion == toVersion {
|
|
suite.T().Logf("skipping Kubernetes upgrade, as versions are equal %q -> %q", fromVersion, toVersion)
|
|
|
|
return
|
|
}
|
|
|
|
suite.T().Logf("upgrading Kubernetes: %q -> %q", fromVersion, toVersion)
|
|
|
|
path, err := upgrade.NewPath(fromVersion, toVersion)
|
|
suite.Require().NoError(err)
|
|
|
|
options := kubernetes.UpgradeOptions{
|
|
Path: path,
|
|
|
|
ControlPlaneEndpoint: suite.controlPlaneEndpoint,
|
|
|
|
UpgradeKubelet: !skipKubeletUpgrade,
|
|
PrePullImages: true,
|
|
}
|
|
|
|
suite.Require().NoError(kubernetes.Upgrade(suite.ctx, suite.clusterAccess, options))
|
|
}
|
|
|
|
func (suite *UpgradeSuite) untaint(name string) {
|
|
client, err := suite.clusterAccess.K8sClient(suite.ctx)
|
|
suite.Require().NoError(err)
|
|
|
|
n, err := client.CoreV1().Nodes().Get(suite.ctx, name, metav1.GetOptions{})
|
|
suite.Require().NoError(err)
|
|
|
|
oldData, err := json.Marshal(n)
|
|
suite.Require().NoError(err)
|
|
|
|
k := 0
|
|
|
|
for _, taint := range n.Spec.Taints {
|
|
if taint.Key != constants.LabelNodeRoleControlPlane {
|
|
n.Spec.Taints[k] = taint
|
|
k++
|
|
}
|
|
}
|
|
|
|
n.Spec.Taints = n.Spec.Taints[:k]
|
|
|
|
newData, err := json.Marshal(n)
|
|
suite.Require().NoError(err)
|
|
|
|
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, corev1.Node{})
|
|
suite.Require().NoError(err)
|
|
|
|
_, err = client.CoreV1().Nodes().Patch(
|
|
suite.ctx,
|
|
n.Name,
|
|
types.StrategicMergePatchType,
|
|
patchBytes,
|
|
metav1.PatchOptions{},
|
|
)
|
|
suite.Require().NoError(err)
|
|
}
|
|
|
|
// TestRolling performs rolling upgrade starting with master nodes.
|
|
func (suite *UpgradeSuite) TestRolling() {
|
|
suite.setupCluster()
|
|
|
|
client, err := suite.clusterAccess.Client()
|
|
suite.Require().NoError(err)
|
|
|
|
// verify initial cluster version
|
|
suite.assertSameVersionCluster(client, suite.spec.SourceVersion)
|
|
|
|
// upgrade master nodes
|
|
for _, node := range suite.Cluster.Info().Nodes {
|
|
if node.Type == machine.TypeInit || node.Type == machine.TypeControlPlane {
|
|
suite.upgradeNode(client, node)
|
|
}
|
|
}
|
|
|
|
// upgrade worker nodes
|
|
for _, node := range suite.Cluster.Info().Nodes {
|
|
if node.Type == machine.TypeWorker {
|
|
suite.upgradeNode(client, node)
|
|
}
|
|
}
|
|
|
|
// verify final cluster version
|
|
suite.assertSameVersionCluster(client, suite.spec.TargetVersion)
|
|
|
|
// upgrade Kubernetes if required
|
|
suite.upgradeKubernetes(suite.spec.SourceK8sVersion, suite.spec.TargetK8sVersion, suite.spec.SkipKubeletUpgrade)
|
|
|
|
// run e2e test
|
|
suite.runE2E(suite.spec.TargetK8sVersion)
|
|
}
|
|
|
|
// SuiteName ...
|
|
func (suite *UpgradeSuite) SuiteName() string {
|
|
if suite.spec.ShortName == "" {
|
|
suite.spec = suite.specGen()
|
|
}
|
|
|
|
return fmt.Sprintf("provision.UpgradeSuite.%s-TR%d", suite.spec.ShortName, suite.track)
|
|
}
|
|
|
|
func init() {
|
|
allSuites = append(
|
|
allSuites,
|
|
&UpgradeSuite{specGen: upgradePreviousToStable, track: 0},
|
|
&UpgradeSuite{specGen: upgradeStableToCurrent, track: 1},
|
|
&UpgradeSuite{specGen: upgradeCurrentToCurrent, track: 2},
|
|
&UpgradeSuite{specGen: upgradeStableToCurrentPreserve, track: 0},
|
|
&UpgradeSuite{specGen: upgradeStableToCurrentPreserveStage, track: 1},
|
|
)
|
|
}
|