mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-13 00:21:12 +02:00
Enabling BTF in the kernel brakes kexec from pre-BTF kernel (e.g. when upgrading from 1.2.x to 1.3.x). As there's no way to detect Talos version in the installer at the moment, use another way to detect whether BTF is enabled in the Talos version which is running right now. Fixes #6443 Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
755 lines
22 KiB
Go
755 lines
22 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
//go:build integration_provision
|
|
|
|
package provision
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/netip"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/siderolabs/gen/slices"
|
|
"github.com/siderolabs/go-blockdevice/blockdevice/encryption"
|
|
"github.com/siderolabs/go-retry/retry"
|
|
sideronet "github.com/siderolabs/net"
|
|
"github.com/stretchr/testify/suite"
|
|
corev1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/strategicpatch"
|
|
|
|
"github.com/siderolabs/talos/cmd/talosctl/pkg/mgmt/helpers"
|
|
"github.com/siderolabs/talos/internal/integration/base"
|
|
"github.com/siderolabs/talos/pkg/cluster/check"
|
|
"github.com/siderolabs/talos/pkg/cluster/kubernetes"
|
|
"github.com/siderolabs/talos/pkg/cluster/sonobuoy"
|
|
"github.com/siderolabs/talos/pkg/images"
|
|
machineapi "github.com/siderolabs/talos/pkg/machinery/api/machine"
|
|
talosclient "github.com/siderolabs/talos/pkg/machinery/client"
|
|
clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config"
|
|
"github.com/siderolabs/talos/pkg/machinery/config"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1/bundle"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1/generate"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1/machine"
|
|
"github.com/siderolabs/talos/pkg/machinery/constants"
|
|
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
|
|
"github.com/siderolabs/talos/pkg/provision"
|
|
"github.com/siderolabs/talos/pkg/provision/access"
|
|
"github.com/siderolabs/talos/pkg/provision/providers/qemu"
|
|
)
|
|
|
|
//nolint:maligned
|
|
type upgradeSpec struct {
|
|
ShortName string
|
|
|
|
SourceKernelPath string
|
|
SourceInitramfsPath string
|
|
SourceInstallerImage string
|
|
SourceVersion string
|
|
SourceK8sVersion string
|
|
|
|
TargetInstallerImage string
|
|
TargetVersion string
|
|
TargetK8sVersion string
|
|
|
|
SkipKubeletUpgrade bool
|
|
|
|
MasterNodes int
|
|
WorkerNodes int
|
|
|
|
UpgradePreserve bool
|
|
UpgradeStage bool
|
|
WithEncryption bool
|
|
}
|
|
|
|
const (
|
|
// These versions should be kept in sync with Makefile variable RELEASES.
|
|
previousRelease = "v1.1.2"
|
|
stableRelease = "v1.2.6" // or soon-to-be-stable
|
|
// The current version (the one being built on CI) is DefaultSettings.CurrentVersion.
|
|
|
|
// Command to find Kubernetes version for past releases:
|
|
//
|
|
// git show ${TAG}:pkg/machinery/constants/constants.go | grep KubernetesVersion
|
|
previousK8sVersion = "1.24.3" // constants.DefaultKubernetesVersion in the previousRelease
|
|
stableK8sVersion = "1.25.3" // constants.DefaultKubernetesVersion in the stableRelease
|
|
currentK8sVersion = constants.DefaultKubernetesVersion
|
|
)
|
|
|
|
var defaultNameservers = []netip.Addr{netip.MustParseAddr("8.8.8.8"), netip.MustParseAddr("1.1.1.1")}
|
|
|
|
// upgradePreviousToStable upgrades from the previous Talos release to the stable release.
|
|
func upgradePreviousToStable() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("%s-%s", previousRelease, stableRelease),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(previousRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(
|
|
filepath.Join(
|
|
trimVersion(previousRelease),
|
|
constants.InitramfsAsset,
|
|
),
|
|
),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", previousRelease),
|
|
SourceVersion: previousRelease,
|
|
SourceK8sVersion: previousK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
TargetVersion: stableRelease,
|
|
TargetK8sVersion: stableK8sVersion,
|
|
|
|
MasterNodes: DefaultSettings.MasterNodes,
|
|
WorkerNodes: DefaultSettings.WorkerNodes,
|
|
}
|
|
}
|
|
|
|
// upgradeStableToCurrent upgrades from the stable Talos release to the current version.
|
|
func upgradeStableToCurrent() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("%s-%s", stableRelease, DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.InitramfsAsset)),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
SourceVersion: stableRelease,
|
|
SourceK8sVersion: stableK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
),
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: DefaultSettings.MasterNodes,
|
|
WorkerNodes: DefaultSettings.WorkerNodes,
|
|
}
|
|
}
|
|
|
|
// upgradeCurrentToCurrent upgrades the current version to itself.
|
|
func upgradeCurrentToCurrent() upgradeSpec {
|
|
installerImage := fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
)
|
|
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("%s-%s", DefaultSettings.CurrentVersion, DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(constants.KernelAssetWithArch),
|
|
SourceInitramfsPath: helpers.ArtifactPath(constants.InitramfsAssetWithArch),
|
|
SourceInstallerImage: installerImage,
|
|
SourceVersion: DefaultSettings.CurrentVersion,
|
|
SourceK8sVersion: currentK8sVersion,
|
|
|
|
TargetInstallerImage: installerImage,
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: DefaultSettings.MasterNodes,
|
|
WorkerNodes: DefaultSettings.WorkerNodes,
|
|
|
|
WithEncryption: true,
|
|
}
|
|
}
|
|
|
|
// upgradeStableToCurrentPreserve upgrades from the stable Talos release to the current version for single-node cluster with preserve.
|
|
func upgradeStableToCurrentPreserve() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("prsrv-%s-%s", stableRelease, DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.InitramfsAsset)),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
SourceVersion: stableRelease,
|
|
SourceK8sVersion: stableK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
),
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: 1,
|
|
WorkerNodes: 0,
|
|
UpgradePreserve: true,
|
|
}
|
|
}
|
|
|
|
// upgradeStableToCurrentPreserveStage upgrades from the stable Talos release to the current version for single-node cluster with preserve and stage.
|
|
func upgradeStableToCurrentPreserveStage() upgradeSpec {
|
|
return upgradeSpec{
|
|
ShortName: fmt.Sprintf("prsrv-stg-%s-%s", stableRelease, DefaultSettings.CurrentVersion),
|
|
|
|
SourceKernelPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.KernelAsset)),
|
|
SourceInitramfsPath: helpers.ArtifactPath(filepath.Join(trimVersion(stableRelease), constants.InitramfsAsset)),
|
|
SourceInstallerImage: fmt.Sprintf("%s:%s", "ghcr.io/siderolabs/installer", stableRelease),
|
|
SourceVersion: stableRelease,
|
|
SourceK8sVersion: stableK8sVersion,
|
|
|
|
TargetInstallerImage: fmt.Sprintf(
|
|
"%s/%s:%s",
|
|
DefaultSettings.TargetInstallImageRegistry,
|
|
images.DefaultInstallerImageName,
|
|
DefaultSettings.CurrentVersion,
|
|
),
|
|
TargetVersion: DefaultSettings.CurrentVersion,
|
|
TargetK8sVersion: currentK8sVersion,
|
|
|
|
MasterNodes: 1,
|
|
WorkerNodes: 0,
|
|
UpgradePreserve: true,
|
|
UpgradeStage: true,
|
|
}
|
|
}
|
|
|
|
// UpgradeSuite ...
|
|
type UpgradeSuite struct {
|
|
suite.Suite
|
|
base.TalosSuite
|
|
|
|
specGen func() upgradeSpec
|
|
spec upgradeSpec
|
|
|
|
track int
|
|
|
|
provisioner provision.Provisioner
|
|
|
|
configBundle *bundle.ConfigBundle
|
|
|
|
clusterAccess *access.Adapter
|
|
controlPlaneEndpoint string
|
|
|
|
//nolint:containedctx
|
|
ctx context.Context
|
|
ctxCancel context.CancelFunc
|
|
|
|
stateDir string
|
|
cniDir string
|
|
}
|
|
|
|
// SetupSuite ...
|
|
func (suite *UpgradeSuite) SetupSuite() {
|
|
// call generate late in the flow, as it needs to pick up settings overridden by test runner
|
|
suite.spec = suite.specGen()
|
|
|
|
suite.T().Logf("upgrade spec = %v", suite.spec)
|
|
|
|
// timeout for the whole test
|
|
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 30*time.Minute)
|
|
|
|
var err error
|
|
|
|
suite.provisioner, err = qemu.NewProvisioner(suite.ctx)
|
|
suite.Require().NoError(err)
|
|
}
|
|
|
|
// TearDownSuite ...
|
|
func (suite *UpgradeSuite) TearDownSuite() {
|
|
if suite.T().Failed() && DefaultSettings.CrashdumpEnabled && suite.Cluster != nil {
|
|
// for failed tests, produce crash dump for easier debugging,
|
|
// as cluster is going to be torn down below
|
|
suite.provisioner.CrashDump(suite.ctx, suite.Cluster, os.Stderr)
|
|
|
|
if suite.clusterAccess != nil {
|
|
suite.clusterAccess.CrashDump(suite.ctx, os.Stderr)
|
|
}
|
|
}
|
|
|
|
if suite.clusterAccess != nil {
|
|
suite.Assert().NoError(suite.clusterAccess.Close())
|
|
}
|
|
|
|
if suite.Cluster != nil {
|
|
suite.Assert().NoError(suite.provisioner.Destroy(suite.ctx, suite.Cluster))
|
|
}
|
|
|
|
suite.ctxCancel()
|
|
|
|
if suite.stateDir != "" {
|
|
suite.Assert().NoError(os.RemoveAll(suite.stateDir))
|
|
}
|
|
|
|
if suite.provisioner != nil {
|
|
suite.Assert().NoError(suite.provisioner.Close())
|
|
}
|
|
}
|
|
|
|
// setupCluster provisions source clusters and waits for health.
|
|
func (suite *UpgradeSuite) setupCluster() {
|
|
defaultStateDir, err := clientconfig.GetTalosDirectory()
|
|
suite.Require().NoError(err)
|
|
|
|
suite.stateDir = filepath.Join(defaultStateDir, "clusters")
|
|
suite.cniDir = filepath.Join(defaultStateDir, "cni")
|
|
|
|
clusterName := suite.spec.ShortName
|
|
|
|
cidr, err := netip.ParsePrefix(DefaultSettings.CIDR)
|
|
suite.Require().NoError(err)
|
|
|
|
var gatewayIP netip.Addr
|
|
|
|
gatewayIP, err = sideronet.NthIPInNetwork(cidr, 1)
|
|
suite.Require().NoError(err)
|
|
|
|
ips := make([]netip.Addr, suite.spec.MasterNodes+suite.spec.WorkerNodes)
|
|
|
|
for i := range ips {
|
|
ips[i], err = sideronet.NthIPInNetwork(cidr, i+2)
|
|
suite.Require().NoError(err)
|
|
}
|
|
|
|
suite.T().Logf("initializing provisioner with cluster name %q, state directory %q", clusterName, suite.stateDir)
|
|
|
|
request := provision.ClusterRequest{
|
|
Name: clusterName,
|
|
|
|
Network: provision.NetworkRequest{
|
|
Name: clusterName,
|
|
CIDRs: []netip.Prefix{cidr},
|
|
GatewayAddrs: []netip.Addr{gatewayIP},
|
|
MTU: DefaultSettings.MTU,
|
|
Nameservers: defaultNameservers,
|
|
CNI: provision.CNIConfig{
|
|
BinPath: []string{filepath.Join(suite.cniDir, "bin")},
|
|
ConfDir: filepath.Join(suite.cniDir, "conf.d"),
|
|
CacheDir: filepath.Join(suite.cniDir, "cache"),
|
|
|
|
BundleURL: DefaultSettings.CNIBundleURL,
|
|
},
|
|
},
|
|
|
|
KernelPath: suite.spec.SourceKernelPath,
|
|
InitramfsPath: suite.spec.SourceInitramfsPath,
|
|
|
|
SelfExecutable: suite.TalosctlPath,
|
|
StateDirectory: suite.stateDir,
|
|
}
|
|
|
|
defaultInternalLB, _ := suite.provisioner.GetLoadBalancers(request.Network)
|
|
suite.controlPlaneEndpoint = fmt.Sprintf("https://%s", nethelpers.JoinHostPort(defaultInternalLB, constants.DefaultControlPlanePort))
|
|
|
|
genOptions := suite.provisioner.GenOptions(request.Network)
|
|
|
|
for _, registryMirror := range DefaultSettings.RegistryMirrors {
|
|
parts := strings.SplitN(registryMirror, "=", 2)
|
|
suite.Require().Len(parts, 2)
|
|
|
|
genOptions = append(genOptions, generate.WithRegistryMirror(parts[0], parts[1]))
|
|
}
|
|
|
|
masterEndpoints := make([]string, suite.spec.MasterNodes)
|
|
for i := range masterEndpoints {
|
|
masterEndpoints[i] = ips[i].String()
|
|
}
|
|
|
|
if DefaultSettings.CustomCNIURL != "" {
|
|
genOptions = append(
|
|
genOptions, generate.WithClusterCNIConfig(
|
|
&v1alpha1.CNIConfig{
|
|
CNIName: constants.CustomCNI,
|
|
CNIUrls: []string{DefaultSettings.CustomCNIURL},
|
|
},
|
|
),
|
|
)
|
|
}
|
|
|
|
if suite.spec.WithEncryption {
|
|
genOptions = append(
|
|
genOptions, generate.WithSystemDiskEncryption(
|
|
&v1alpha1.SystemDiskEncryptionConfig{
|
|
StatePartition: &v1alpha1.EncryptionConfig{
|
|
EncryptionProvider: encryption.LUKS2,
|
|
EncryptionKeys: []*v1alpha1.EncryptionKey{
|
|
{
|
|
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
|
|
KeySlot: 0,
|
|
},
|
|
},
|
|
},
|
|
EphemeralPartition: &v1alpha1.EncryptionConfig{
|
|
EncryptionProvider: encryption.LUKS2,
|
|
EncryptionKeys: []*v1alpha1.EncryptionKey{
|
|
{
|
|
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
|
|
KeySlot: 0,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
),
|
|
)
|
|
}
|
|
|
|
versionContract, err := config.ParseContractFromVersion(suite.spec.SourceVersion)
|
|
suite.Require().NoError(err)
|
|
|
|
suite.configBundle, err = bundle.NewConfigBundle(
|
|
bundle.WithInputOptions(
|
|
&bundle.InputOptions{
|
|
ClusterName: clusterName,
|
|
Endpoint: suite.controlPlaneEndpoint,
|
|
KubeVersion: suite.spec.SourceK8sVersion,
|
|
GenOptions: append(
|
|
genOptions,
|
|
generate.WithEndpointList(masterEndpoints),
|
|
generate.WithInstallImage(suite.spec.SourceInstallerImage),
|
|
generate.WithDNSDomain("cluster.local"),
|
|
generate.WithVersionContract(versionContract),
|
|
),
|
|
},
|
|
),
|
|
)
|
|
suite.Require().NoError(err)
|
|
|
|
for i := 0; i < suite.spec.MasterNodes; i++ {
|
|
request.Nodes = append(
|
|
request.Nodes,
|
|
provision.NodeRequest{
|
|
Name: fmt.Sprintf("master-%d", i+1),
|
|
Type: machine.TypeControlPlane,
|
|
IPs: []netip.Addr{ips[i]},
|
|
Memory: DefaultSettings.MemMB * 1024 * 1024,
|
|
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
|
|
Disks: []*provision.Disk{
|
|
{
|
|
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
|
|
},
|
|
},
|
|
Config: suite.configBundle.ControlPlane(),
|
|
},
|
|
)
|
|
}
|
|
|
|
for i := 1; i <= suite.spec.WorkerNodes; i++ {
|
|
request.Nodes = append(
|
|
request.Nodes,
|
|
provision.NodeRequest{
|
|
Name: fmt.Sprintf("worker-%d", i),
|
|
Type: machine.TypeWorker,
|
|
IPs: []netip.Addr{ips[suite.spec.MasterNodes+i-1]},
|
|
Memory: DefaultSettings.MemMB * 1024 * 1024,
|
|
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
|
|
Disks: []*provision.Disk{
|
|
{
|
|
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
|
|
},
|
|
},
|
|
Config: suite.configBundle.Worker(),
|
|
},
|
|
)
|
|
}
|
|
|
|
suite.Cluster, err = suite.provisioner.Create(
|
|
suite.ctx, request,
|
|
provision.WithBootlader(true),
|
|
provision.WithUEFI(true),
|
|
provision.WithTalosConfig(suite.configBundle.TalosConfig()),
|
|
)
|
|
suite.Require().NoError(err)
|
|
|
|
c, err := clientconfig.Open("")
|
|
suite.Require().NoError(err)
|
|
|
|
c.Merge(suite.configBundle.TalosConfig())
|
|
|
|
suite.Require().NoError(c.Save(""))
|
|
|
|
suite.clusterAccess = access.NewAdapter(suite.Cluster, provision.WithTalosConfig(suite.configBundle.TalosConfig()))
|
|
|
|
suite.Require().NoError(suite.clusterAccess.Bootstrap(suite.ctx, os.Stdout))
|
|
|
|
suite.waitForClusterHealth()
|
|
}
|
|
|
|
// waitForClusterHealth asserts cluster health after any change.
|
|
func (suite *UpgradeSuite) waitForClusterHealth() {
|
|
runs := 1
|
|
|
|
singleNodeCluster := len(suite.Cluster.Info().Nodes) == 1
|
|
if singleNodeCluster {
|
|
// run health check several times for single node clusters,
|
|
// as self-hosted control plane is not stable after reboot
|
|
runs = 3
|
|
}
|
|
|
|
for run := 0; run < runs; run++ {
|
|
if run > 0 {
|
|
time.Sleep(15 * time.Second)
|
|
}
|
|
|
|
checkCtx, checkCtxCancel := context.WithTimeout(suite.ctx, 15*time.Minute)
|
|
defer checkCtxCancel()
|
|
|
|
suite.Require().NoError(
|
|
check.Wait(
|
|
checkCtx,
|
|
suite.clusterAccess,
|
|
check.DefaultClusterChecks(),
|
|
check.StderrReporter(),
|
|
),
|
|
)
|
|
}
|
|
}
|
|
|
|
// runE2E runs e2e test on the cluster.
|
|
func (suite *UpgradeSuite) runE2E(k8sVersion string) {
|
|
if suite.spec.WorkerNodes == 0 {
|
|
// no worker nodes, should make masters schedulable
|
|
suite.untaint("master-1")
|
|
}
|
|
|
|
options := sonobuoy.DefaultOptions()
|
|
options.KubernetesVersion = k8sVersion
|
|
|
|
suite.Assert().NoError(sonobuoy.Run(suite.ctx, suite.clusterAccess, options))
|
|
}
|
|
|
|
func (suite *UpgradeSuite) assertSameVersionCluster(client *talosclient.Client, expectedVersion string) {
|
|
nodes := slices.Map(suite.Cluster.Info().Nodes, func(node provision.NodeInfo) string { return node.IPs[0].String() })
|
|
ctx := talosclient.WithNodes(suite.ctx, nodes...)
|
|
|
|
var v *machineapi.VersionResponse
|
|
|
|
err := retry.Constant(
|
|
time.Minute,
|
|
).Retry(
|
|
func() error {
|
|
var e error
|
|
v, e = client.Version(ctx)
|
|
|
|
return retry.ExpectedError(e)
|
|
},
|
|
)
|
|
|
|
suite.Require().NoError(err)
|
|
|
|
suite.Require().Len(v.Messages, len(nodes))
|
|
|
|
for _, version := range v.Messages {
|
|
suite.Assert().Equal(expectedVersion, version.Version.Tag)
|
|
}
|
|
}
|
|
|
|
func (suite *UpgradeSuite) readVersion(nodeCtx context.Context, client *talosclient.Client) (
|
|
version string,
|
|
err error,
|
|
) {
|
|
var v *machineapi.VersionResponse
|
|
|
|
v, err = client.Version(nodeCtx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
version = v.Messages[0].Version.Tag
|
|
|
|
return
|
|
}
|
|
|
|
func (suite *UpgradeSuite) upgradeNode(client *talosclient.Client, node provision.NodeInfo) {
|
|
suite.T().Logf("upgrading node %s", node.IPs[0])
|
|
|
|
nodeCtx := talosclient.WithNodes(suite.ctx, node.IPs[0].String())
|
|
|
|
var (
|
|
resp *machineapi.UpgradeResponse
|
|
err error
|
|
)
|
|
|
|
err = retry.Constant(time.Minute, retry.WithUnits(10*time.Second)).Retry(
|
|
func() error {
|
|
resp, err = client.Upgrade(
|
|
nodeCtx,
|
|
suite.spec.TargetInstallerImage,
|
|
suite.spec.UpgradePreserve,
|
|
suite.spec.UpgradeStage,
|
|
false,
|
|
)
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "leader changed") {
|
|
return retry.ExpectedError(err)
|
|
}
|
|
|
|
if strings.Contains(err.Error(), "failed to acquire upgrade lock") {
|
|
return retry.ExpectedError(err)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
},
|
|
)
|
|
|
|
err = base.IgnoreGRPCUnavailable(err)
|
|
suite.Require().NoError(err)
|
|
|
|
if resp != nil {
|
|
suite.Require().Equal("Upgrade request received", resp.Messages[0].Ack)
|
|
}
|
|
|
|
// wait for the upgrade to be kicked off
|
|
time.Sleep(10 * time.Second)
|
|
|
|
// wait for the version to be equal to target version
|
|
suite.Require().NoError(
|
|
retry.Constant(10 * time.Minute).Retry(
|
|
func() error {
|
|
var version string
|
|
|
|
version, err = suite.readVersion(nodeCtx, client)
|
|
if err != nil {
|
|
// API might be unresponsive during upgrade
|
|
return retry.ExpectedError(err)
|
|
}
|
|
|
|
if version != suite.spec.TargetVersion {
|
|
// upgrade not finished yet
|
|
return retry.ExpectedError(
|
|
fmt.Errorf(
|
|
"node %q version doesn't match expected: expected %q, got %q",
|
|
node.IPs[0].String(),
|
|
suite.spec.TargetVersion,
|
|
version,
|
|
),
|
|
)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
),
|
|
)
|
|
|
|
suite.waitForClusterHealth()
|
|
}
|
|
|
|
func (suite *UpgradeSuite) upgradeKubernetes(fromVersion, toVersion string, skipKubeletUpgrade bool) {
|
|
if fromVersion == toVersion {
|
|
suite.T().Logf("skipping Kubernetes upgrade, as versions are equal %q -> %q", fromVersion, toVersion)
|
|
|
|
return
|
|
}
|
|
|
|
suite.T().Logf("upgrading Kubernetes: %q -> %q", fromVersion, toVersion)
|
|
|
|
options := kubernetes.UpgradeOptions{
|
|
FromVersion: fromVersion,
|
|
ToVersion: toVersion,
|
|
|
|
ControlPlaneEndpoint: suite.controlPlaneEndpoint,
|
|
|
|
UpgradeKubelet: !skipKubeletUpgrade,
|
|
}
|
|
|
|
suite.Require().NoError(kubernetes.UpgradeTalosManaged(suite.ctx, suite.clusterAccess, options))
|
|
}
|
|
|
|
func (suite *UpgradeSuite) untaint(name string) {
|
|
client, err := suite.clusterAccess.K8sClient(suite.ctx)
|
|
suite.Require().NoError(err)
|
|
|
|
n, err := client.CoreV1().Nodes().Get(suite.ctx, name, metav1.GetOptions{})
|
|
suite.Require().NoError(err)
|
|
|
|
oldData, err := json.Marshal(n)
|
|
suite.Require().NoError(err)
|
|
|
|
k := 0
|
|
|
|
for _, taint := range n.Spec.Taints {
|
|
if taint.Key != constants.LabelNodeRoleMaster && taint.Key != constants.LabelNodeRoleControlPlane {
|
|
n.Spec.Taints[k] = taint
|
|
k++
|
|
}
|
|
}
|
|
|
|
n.Spec.Taints = n.Spec.Taints[:k]
|
|
|
|
newData, err := json.Marshal(n)
|
|
suite.Require().NoError(err)
|
|
|
|
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, corev1.Node{})
|
|
suite.Require().NoError(err)
|
|
|
|
_, err = client.CoreV1().Nodes().Patch(
|
|
suite.ctx,
|
|
n.Name,
|
|
types.StrategicMergePatchType,
|
|
patchBytes,
|
|
metav1.PatchOptions{},
|
|
)
|
|
suite.Require().NoError(err)
|
|
}
|
|
|
|
// TestRolling performs rolling upgrade starting with master nodes.
|
|
func (suite *UpgradeSuite) TestRolling() {
|
|
suite.setupCluster()
|
|
|
|
client, err := suite.clusterAccess.Client()
|
|
suite.Require().NoError(err)
|
|
|
|
// verify initial cluster version
|
|
suite.assertSameVersionCluster(client, suite.spec.SourceVersion)
|
|
|
|
// upgrade master nodes
|
|
for _, node := range suite.Cluster.Info().Nodes {
|
|
if node.Type == machine.TypeInit || node.Type == machine.TypeControlPlane {
|
|
suite.upgradeNode(client, node)
|
|
}
|
|
}
|
|
|
|
// upgrade worker nodes
|
|
for _, node := range suite.Cluster.Info().Nodes {
|
|
if node.Type == machine.TypeWorker {
|
|
suite.upgradeNode(client, node)
|
|
}
|
|
}
|
|
|
|
// verify final cluster version
|
|
suite.assertSameVersionCluster(client, suite.spec.TargetVersion)
|
|
|
|
// upgrade Kubernetes if required
|
|
suite.upgradeKubernetes(suite.spec.SourceK8sVersion, suite.spec.TargetK8sVersion, suite.spec.SkipKubeletUpgrade)
|
|
|
|
// run e2e test
|
|
suite.runE2E(suite.spec.TargetK8sVersion)
|
|
}
|
|
|
|
// SuiteName ...
|
|
func (suite *UpgradeSuite) SuiteName() string {
|
|
if suite.spec.ShortName == "" {
|
|
suite.spec = suite.specGen()
|
|
}
|
|
|
|
return fmt.Sprintf("provision.UpgradeSuite.%s-TR%d", suite.spec.ShortName, suite.track)
|
|
}
|
|
|
|
func init() {
|
|
allSuites = append(
|
|
allSuites,
|
|
&UpgradeSuite{specGen: upgradePreviousToStable, track: 0},
|
|
&UpgradeSuite{specGen: upgradeStableToCurrent, track: 1},
|
|
&UpgradeSuite{specGen: upgradeCurrentToCurrent, track: 2},
|
|
&UpgradeSuite{specGen: upgradeStableToCurrentPreserve, track: 0},
|
|
&UpgradeSuite{specGen: upgradeStableToCurrentPreserveStage, track: 1},
|
|
)
|
|
}
|