From 4da2dd537d5dae884f47bd3f04ddcd05ac6cd222 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 12 Jun 2025 20:47:40 +0400 Subject: [PATCH] feat: enforce Kubernetes version compatibility Fixes #11198 We should enforce in following places: * before starting `upgrade-k8s`, check that all Talos machines would end up with a valid version * validate in Talos machine configuration, this will cover both upgrades, new installs, and any machine configuration manual edits Signed-off-by: Andrey Smirnov --- cmd/installer/pkg/install/preflight.go | 161 ------------------ cmd/installer/pkg/install/preflight_test.go | 42 ----- hack/release.toml | 9 + .../provision/k8s_compatibility.go | 25 ++- pkg/cluster/kubernetes/compat.go | 51 ++++++ pkg/cluster/kubernetes/talos_managed.go | 4 + .../types/v1alpha1/v1alpha1_validation.go | 65 +++++++ .../v1alpha1/v1alpha1_validation_test.go | 159 +++++++++++++++++ 8 files changed, 312 insertions(+), 204 deletions(-) delete mode 100644 cmd/installer/pkg/install/preflight_test.go create mode 100644 pkg/cluster/kubernetes/compat.go diff --git a/cmd/installer/pkg/install/preflight.go b/cmd/installer/pkg/install/preflight.go index c164a06e3..9926c2c4b 100644 --- a/cmd/installer/pkg/install/preflight.go +++ b/cmd/installer/pkg/install/preflight.go @@ -9,10 +9,7 @@ import ( "fmt" "log" "os" - "strings" - "github.com/cosi-project/runtime/pkg/safe" - "github.com/cosi-project/runtime/pkg/state" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" @@ -20,7 +17,6 @@ import ( "github.com/siderolabs/talos/pkg/machinery/client" "github.com/siderolabs/talos/pkg/machinery/compatibility" "github.com/siderolabs/talos/pkg/machinery/constants" - "github.com/siderolabs/talos/pkg/machinery/resources/k8s" "github.com/siderolabs/talos/pkg/machinery/role" "github.com/siderolabs/talos/pkg/machinery/version" ) @@ -77,7 +73,6 @@ func (checks *PreflightChecks) Run(ctx context.Context) error { for _, check := range []func(context.Context) error{ checks.talosVersion, - checks.kubernetesVersion, } { if err := check(ctx); err != nil { return fmt.Errorf("pre-flight checks failed: %w", err) @@ -112,162 +107,6 @@ func (checks *PreflightChecks) talosVersion(ctx context.Context) error { return checks.installerTalosVersion.UpgradeableFrom(checks.hostTalosVersion) } -type k8sVersions struct { - kubelet *compatibility.KubernetesVersion - apiServer *compatibility.KubernetesVersion - scheduler *compatibility.KubernetesVersion - controllerManager *compatibility.KubernetesVersion -} - -//nolint:gocyclo -func (versions *k8sVersions) gatherVersions(ctx context.Context, client *client.Client) error { - kubeletSpec, err := safe.StateGet[*k8s.KubeletSpec](ctx, client.COSI, k8s.NewKubeletSpec(k8s.NamespaceName, k8s.KubeletID).Metadata()) - if err != nil && !state.IsNotFoundError(err) { - return fmt.Errorf("error getting kubelet spec: %w", err) - } - - if kubeletSpec != nil { - versions.kubelet, err = KubernetesVersionFromImageRef(kubeletSpec.TypedSpec().Image) - if err != nil { - return fmt.Errorf("error parsing kubelet version: %w", err) - } - } - - apiServerSpec, err := safe.StateGet[*k8s.APIServerConfig](ctx, client.COSI, k8s.NewAPIServerConfig().Metadata()) - if err != nil && !state.IsNotFoundError(err) { - return fmt.Errorf("error getting API server spec: %w", err) - } - - if apiServerSpec != nil { - versions.apiServer, err = KubernetesVersionFromImageRef(apiServerSpec.TypedSpec().Image) - if err != nil { - return fmt.Errorf("error parsing API server version: %w", err) - } - } - - schedulerSpec, err := safe.StateGet[*k8s.SchedulerConfig](ctx, client.COSI, k8s.NewSchedulerConfig().Metadata()) - if err != nil && !state.IsNotFoundError(err) { - return fmt.Errorf("error getting scheduler spec: %w", err) - } - - if schedulerSpec != nil { - versions.scheduler, err = KubernetesVersionFromImageRef(schedulerSpec.TypedSpec().Image) - if err != nil { - return fmt.Errorf("error parsing scheduler version: %w", err) - } - } - - controllerManagerSpec, err := safe.StateGet[*k8s.ControllerManagerConfig](ctx, client.COSI, k8s.NewControllerManagerConfig().Metadata()) - if err != nil && !state.IsNotFoundError(err) { - return fmt.Errorf("error getting controller manager spec: %w", err) - } - - if controllerManagerSpec != nil { - versions.controllerManager, err = KubernetesVersionFromImageRef(controllerManagerSpec.TypedSpec().Image) - if err != nil { - return fmt.Errorf("error parsing controller manager version: %w", err) - } - } - - return nil -} - -func (versions *k8sVersions) checkCompatibility(target *compatibility.TalosVersion) error { - for _, component := range []struct { - name string - version *compatibility.KubernetesVersion - }{ - { - name: "kubelet", - version: versions.kubelet, - }, - { - name: "kube-apiserver", - version: versions.apiServer, - }, - { - name: "kube-scheduler", - version: versions.scheduler, - }, - { - name: "kube-controller-manager", - version: versions.controllerManager, - }, - } { - if component.version == nil { - continue - } - - if err := component.version.SupportedWith(target); err != nil { - return fmt.Errorf("component %s version issue: %w", component.name, err) - } - } - - return nil -} - -func (versions *k8sVersions) String() string { - var components []string //nolint:prealloc - - for _, component := range []struct { - name string - version *compatibility.KubernetesVersion - }{ - { - name: "kubelet", - version: versions.kubelet, - }, - { - name: "kube-apiserver", - version: versions.apiServer, - }, - { - name: "kube-scheduler", - version: versions.scheduler, - }, - { - name: "kube-controller-manager", - version: versions.controllerManager, - }, - } { - if component.version == nil { - continue - } - - components = append(components, fmt.Sprintf("%s: %s", component.name, component.version)) - } - - return strings.Join(components, ", ") -} - -func (checks *PreflightChecks) kubernetesVersion(ctx context.Context) error { - var versions k8sVersions - - if err := versions.gatherVersions(ctx, checks.client); err != nil { - return err - } - - log.Printf("host Kubernetes versions: %s", &versions) - - return versions.checkCompatibility(checks.installerTalosVersion) -} - -// KubernetesVersionFromImageRef parses the Kubernetes version from the image reference. -func KubernetesVersionFromImageRef(ref string) (*compatibility.KubernetesVersion, error) { - idx := strings.LastIndex(ref, ":v") - if idx == -1 { - return nil, fmt.Errorf("invalid image reference: %q", ref) - } - - versionPart := ref[idx+2:] - - if shaIndex := strings.Index(versionPart, "@"); shaIndex != -1 { - versionPart = versionPart[:shaIndex] - } - - return compatibility.ParseKubernetesVersion(versionPart) -} - func unpack[T any](s []T) T { if len(s) != 1 { panic("unpack: slice length is not 1") diff --git a/cmd/installer/pkg/install/preflight_test.go b/cmd/installer/pkg/install/preflight_test.go deleted file mode 100644 index 77dd5d029..000000000 --- a/cmd/installer/pkg/install/preflight_test.go +++ /dev/null @@ -1,42 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -package install_test - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/siderolabs/talos/cmd/installer/pkg/install" -) - -func TestKubernetesVersionFromImageRef(t *testing.T) { - t.Parallel() - - for _, test := range []struct { - imageRef string - - expectedVersion string - }{ - { - imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2", - expectedVersion: "1.32.2", - }, - { - imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2@sha256:123456", - expectedVersion: "1.32.2", - }, - } { - t.Run(test.imageRef, func(t *testing.T) { - t.Parallel() - - version, err := install.KubernetesVersionFromImageRef(test.imageRef) - require.NoError(t, err) - - assert.Equal(t, test.expectedVersion, version.String()) - }) - } -} diff --git a/hack/release.toml b/hack/release.toml index 088453bf9..f80dce3b2 100644 --- a/hack/release.toml +++ b/hack/release.toml @@ -59,6 +59,15 @@ Talos VMWare platform now supports `arm64` architecture in addition to `amd64`. description = """\ Talos on Azure now defaults to MTU of 1400 bytes for the `eth0` interface to avoid packet fragmentation issues. The default MTU can be overriden with machine configuration. +""" + + [notes.k8s_version] + title = "Kubernetes Version Validation" + description = """\ +Talos now validates Kubernetes version in the image submitted in the machine configuration. +Previously this check was performed only on upgrade, but now it is consistently applied to upgrade, initial provisioning, and machine configuration updates. + +This implies that all image references should contain the tag, even if the image is pinned by digest. """ [make_deps] diff --git a/internal/integration/provision/k8s_compatibility.go b/internal/integration/provision/k8s_compatibility.go index c73d3bdb2..3f4a6b833 100644 --- a/internal/integration/provision/k8s_compatibility.go +++ b/internal/integration/provision/k8s_compatibility.go @@ -18,7 +18,9 @@ import ( "github.com/siderolabs/talos/cmd/talosctl/pkg/mgmt/helpers" "github.com/siderolabs/talos/pkg/images" + "github.com/siderolabs/talos/pkg/machinery/compatibility" "github.com/siderolabs/talos/pkg/machinery/constants" + "github.com/siderolabs/talos/pkg/machinery/version" ) // K8sCompatibilitySuite ... @@ -58,6 +60,22 @@ func (suite *K8sCompatibilitySuite) SetupSuite() { Patch: 0, } + // while Talos is in alpha stage, DefaultKubernetesVersion might be 1 minor behind the latest alpha Kubernetes version, + // so we need to ensure that minVersion fits into compatibility range + minVersionAdjusted := false + + currentTalosVersion, err := compatibility.ParseTalosVersion(version.NewVersion()) + suite.Require().NoError(err) + + minKubernetesVersion, err := compatibility.ParseKubernetesVersion(minVersion.String()) + suite.Require().NoError(err) + + if minKubernetesVersion.SupportedWith(currentTalosVersion) != nil { + // bump up minVersion to the next minor version + minVersion.Minor++ + minVersionAdjusted = true + } + type versionInfo struct { Major uint64 Minor uint64 @@ -111,7 +129,12 @@ func (suite *K8sCompatibilitySuite) SetupSuite() { suite.T().Logf("using following upgrade sequence: %v", suite.versionsSequence) - suite.Assert().Len(suite.versionsSequence, constants.SupportedKubernetesVersions) + if minVersionAdjusted { + suite.T().Logf("min Kubernetes version was adjusted to %s to fit Talos compatibility range", minVersion.String()) + suite.Assert().Len(suite.versionsSequence, constants.SupportedKubernetesVersions-1) + } else { + suite.Assert().Len(suite.versionsSequence, constants.SupportedKubernetesVersions) + } suite.BaseSuite.SetupSuite() } diff --git a/pkg/cluster/kubernetes/compat.go b/pkg/cluster/kubernetes/compat.go new file mode 100644 index 000000000..9af54539f --- /dev/null +++ b/pkg/cluster/kubernetes/compat.go @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package kubernetes + +import ( + "context" + "fmt" + + "golang.org/x/sync/errgroup" + + "github.com/siderolabs/talos/pkg/machinery/client" + "github.com/siderolabs/talos/pkg/machinery/compatibility" +) + +// VerifyVersionCompatibility checks if the given Kubernetes version is compatible with the current Talos version. +func VerifyVersionCompatibility(ctx context.Context, talosClient *client.Client, nodes []string, k8sVersion string, logger func(string, ...any)) error { + eg, ctx := errgroup.WithContext(ctx) + + k8sVersionParsed, err := compatibility.ParseKubernetesVersion(k8sVersion) + if err != nil { + return fmt.Errorf("error parsing Kubernetes version %q: %w", k8sVersion, err) + } + + for _, node := range nodes { + eg.Go(func() error { + nodeCtx := client.WithNode(ctx, node) + + versionResp, err := talosClient.Version(nodeCtx) + if err != nil { + return fmt.Errorf("error getting Talos version on node %q: %w", node, err) + } + + talosVersion, err := compatibility.ParseTalosVersion(versionResp.Messages[0].GetVersion()) + if err != nil { + return fmt.Errorf("error parsing Talos version on node %q: %w", node, err) + } + + if err = k8sVersionParsed.SupportedWith(talosVersion); err != nil { + return fmt.Errorf("compatibility check failed on node %q: %w", node, err) + } + + logger("> %q: Talos version %s is compatible with Kubernetes version %s", node, talosVersion, k8sVersion) + + return nil + }) + } + + return eg.Wait() +} diff --git a/pkg/cluster/kubernetes/talos_managed.go b/pkg/cluster/kubernetes/talos_managed.go index 3171327c1..419ff95d3 100644 --- a/pkg/cluster/kubernetes/talos_managed.go +++ b/pkg/cluster/kubernetes/talos_managed.go @@ -122,6 +122,10 @@ func Upgrade(ctx context.Context, cluster UpgradeProvider, options UpgradeOption return err } + if err = VerifyVersionCompatibility(ctx, talosClient, slices.Concat(options.controlPlaneNodes, options.workerNodes), options.Path.ToVersion(), options.Log); err != nil { + return err + } + upgradeChecks, err := upgrade.NewChecks(options.Path, talosClient.COSI, k8sConfig, options.controlPlaneNodes, options.workerNodes, options.Log) if err != nil { return err diff --git a/pkg/machinery/config/types/v1alpha1/v1alpha1_validation.go b/pkg/machinery/config/types/v1alpha1/v1alpha1_validation.go index 2d109a52f..da8d61c07 100644 --- a/pkg/machinery/config/types/v1alpha1/v1alpha1_validation.go +++ b/pkg/machinery/config/types/v1alpha1/v1alpha1_validation.go @@ -24,6 +24,7 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" sideronet "github.com/siderolabs/net" + "github.com/siderolabs/talos/pkg/machinery/compatibility" "github.com/siderolabs/talos/pkg/machinery/config/config" "github.com/siderolabs/talos/pkg/machinery/config/machine" "github.com/siderolabs/talos/pkg/machinery/config/types/block/blockhelpers" @@ -33,6 +34,7 @@ import ( "github.com/siderolabs/talos/pkg/machinery/labels" "github.com/siderolabs/talos/pkg/machinery/nethelpers" "github.com/siderolabs/talos/pkg/machinery/role" + "github.com/siderolabs/talos/pkg/machinery/version" ) var ( @@ -985,7 +987,70 @@ func (c *Config) RuntimeValidate(ctx context.Context, st state.State, mode valid if len(c.MachineConfig.Install().Extensions()) > 0 { warnings = append(warnings, ".machine.install.extensions is deprecated, please see https://www.talos.dev/latest/talos-guides/install/boot-assets/") } + + if err := ValidateKubernetesImageTag(c.Machine().Kubelet().Image()); err != nil { + result = multierror.Append(result, fmt.Errorf("kubelet image is not valid: %w", err)) + } + } + + if c.ClusterConfig != nil && c.MachineConfig != nil { + if c.Machine().Type().IsControlPlane() { + for _, spec := range []struct { + name string + imageRef string + }{ + { + name: "kube-apiserver", + imageRef: c.Cluster().APIServer().Image(), + }, + { + name: "kube-controller-manager", + imageRef: c.Cluster().ControllerManager().Image(), + }, + { + name: "kube-scheduler", + imageRef: c.Cluster().Scheduler().Image(), + }, + } { + if err := ValidateKubernetesImageTag(spec.imageRef); err != nil { + result = multierror.Append(result, fmt.Errorf("%s image is not valid: %w", spec.name, err)) + } + } + } } return warnings, result.ErrorOrNil() } + +// ValidateKubernetesImageTag validates the Kubernetes image tag format. +func ValidateKubernetesImageTag(imageRef string) error { + // this method is called from RuntimeValidate, so we are inside running Talos, + // so the version of Talos is available, and we can check compatibility + currentTalosVersion, err := compatibility.ParseTalosVersion(version.NewVersion()) + if err != nil { + return fmt.Errorf("failed to parse Talos version: %w", err) + } + + k8sVersion, err := KubernetesVersionFromImageRef(imageRef) + if err != nil { + return fmt.Errorf("failed to parse Kubernetes version from image reference %q: %w", imageRef, err) + } + + return k8sVersion.SupportedWith(currentTalosVersion) +} + +// KubernetesVersionFromImageRef parses the Kubernetes version from the image reference. +func KubernetesVersionFromImageRef(ref string) (*compatibility.KubernetesVersion, error) { + idx := strings.LastIndex(ref, ":v") + if idx == -1 { + return nil, fmt.Errorf("invalid image reference: %q", ref) + } + + versionPart := ref[idx+2:] + + if shaIndex := strings.Index(versionPart, "@"); shaIndex != -1 { + versionPart = versionPart[:shaIndex] + } + + return compatibility.ParseKubernetesVersion(versionPart) +} diff --git a/pkg/machinery/config/types/v1alpha1/v1alpha1_validation_test.go b/pkg/machinery/config/types/v1alpha1/v1alpha1_validation_test.go index 6af0fd930..267ca9be9 100644 --- a/pkg/machinery/config/types/v1alpha1/v1alpha1_validation_test.go +++ b/pkg/machinery/config/types/v1alpha1/v1alpha1_validation_test.go @@ -10,14 +10,18 @@ import ( "strings" "testing" + "github.com/cosi-project/runtime/pkg/state" + "github.com/cosi-project/runtime/pkg/state/impl/inmem" "github.com/siderolabs/crypto/x509" "github.com/siderolabs/go-pointer" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/siderolabs/talos/pkg/machinery/compatibility" "github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1" "github.com/siderolabs/talos/pkg/machinery/config/validation" "github.com/siderolabs/talos/pkg/machinery/constants" + "github.com/siderolabs/talos/pkg/machinery/version" ) type runtimeMode struct { @@ -2069,3 +2073,158 @@ func TestValidateCNI(t *testing.T) { }) } } + +func TestKubernetesVersionFromImageRef(t *testing.T) { + t.Parallel() + + for _, test := range []struct { + imageRef string + + expectedVersion string + }{ + { + imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2", + expectedVersion: "1.32.2", + }, + { + imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2@sha256:123456", + expectedVersion: "1.32.2", + }, + } { + t.Run(test.imageRef, func(t *testing.T) { + t.Parallel() + + version, err := v1alpha1.KubernetesVersionFromImageRef(test.imageRef) + require.NoError(t, err) + + assert.Equal(t, test.expectedVersion, version.String()) + }) + } +} + +func TestRuntimeValidate(t *testing.T) { + t.Parallel() + + endpointURL, err := url.Parse("https://localhost:6443/") + require.NoError(t, err) + + for _, test := range []struct { + name string + config *v1alpha1.Config + requiresInstall bool + strict bool + expectedWarnings []string + expectedError string + }{ + { + name: "valid", + config: &v1alpha1.Config{ + ClusterConfig: &v1alpha1.ClusterConfig{ + ControlPlane: &v1alpha1.ControlPlaneConfig{ + Endpoint: &v1alpha1.Endpoint{ + URL: endpointURL, + }, + }, + }, + MachineConfig: &v1alpha1.MachineConfig{ + MachineType: "controlplane", + }, + }, + }, + { + name: "old kubelet version", + config: &v1alpha1.Config{ + ClusterConfig: &v1alpha1.ClusterConfig{ + ControlPlane: &v1alpha1.ControlPlaneConfig{ + Endpoint: &v1alpha1.Endpoint{ + URL: endpointURL, + }, + }, + }, + MachineConfig: &v1alpha1.MachineConfig{ + MachineType: "worker", + MachineKubelet: &v1alpha1.KubeletConfig{ + KubeletImage: constants.KubeletImage + ":v1.24.0", + }, + }, + }, + expectedError: "1 error occurred:\n\t* kubelet image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\n", + }, + { + name: "old api-server version", + config: &v1alpha1.Config{ + ClusterConfig: &v1alpha1.ClusterConfig{ + ControlPlane: &v1alpha1.ControlPlaneConfig{ + Endpoint: &v1alpha1.Endpoint{ + URL: endpointURL, + }, + }, + APIServerConfig: &v1alpha1.APIServerConfig{ + ContainerImage: constants.KubernetesAPIServerImage + ":v1.24.0", + }, + }, + MachineConfig: &v1alpha1.MachineConfig{ + MachineType: "controlplane", + MachineKubelet: &v1alpha1.KubeletConfig{ + KubeletImage: constants.KubeletImage + ":v" + constants.DefaultKubernetesVersion, + }, + }, + }, + expectedError: "1 error occurred:\n\t* kube-apiserver image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\n", + }, + { + name: "old controller-manager and scheduler version", + config: &v1alpha1.Config{ + ClusterConfig: &v1alpha1.ClusterConfig{ + ControlPlane: &v1alpha1.ControlPlaneConfig{ + Endpoint: &v1alpha1.Endpoint{ + URL: endpointURL, + }, + }, + APIServerConfig: &v1alpha1.APIServerConfig{ + ContainerImage: constants.KubernetesAPIServerImage + ":v" + constants.DefaultKubernetesVersion, + }, + ControllerManagerConfig: &v1alpha1.ControllerManagerConfig{ + ContainerImage: constants.KubernetesControllerManagerImage + ":v1.24.0", + }, + SchedulerConfig: &v1alpha1.SchedulerConfig{ + ContainerImage: constants.KubernetesSchedulerImage + ":v1.24.0", + }, + }, + MachineConfig: &v1alpha1.MachineConfig{ + MachineType: "controlplane", + MachineKubelet: &v1alpha1.KubeletConfig{ + KubeletImage: constants.KubeletImage + ":v" + constants.DefaultKubernetesVersion, + }, + }, + }, + expectedError: "2 errors occurred:\n\t* kube-controller-manager image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\t* kube-scheduler image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\n", //nolint:lll + }, + } { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + var opts []validation.Option + if test.strict { + opts = append(opts, validation.WithStrict()) + } + + st := state.WrapCore(inmem.NewState("")) + + warnings, errors := test.config.RuntimeValidate(t.Context(), st, runtimeMode{test.requiresInstall}, opts...) + + assert.Equal(t, test.expectedWarnings, warnings) + + currentTalosVersion, err := compatibility.ParseTalosVersion(version.NewVersion()) + require.NoError(t, err) + + if test.expectedError == "" { + assert.NoError(t, errors) + } else { + test.expectedError = strings.ReplaceAll(test.expectedError, "VERSION", currentTalosVersion.String()) + + assert.EqualError(t, errors, test.expectedError) + } + }) + } +}