feat: enforce Kubernetes version compatibility

Fixes #11198

We should enforce in following places:

* before starting `upgrade-k8s`, check that all Talos machines would end
  up with a valid version
* validate in Talos machine configuration, this will cover both
  upgrades, new installs, and any machine configuration manual edits

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2025-06-12 20:47:40 +04:00
parent 6c7f8201a9
commit 4da2dd537d
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
8 changed files with 312 additions and 204 deletions

View File

@ -9,10 +9,7 @@ import (
"fmt"
"log"
"os"
"strings"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/metadata"
@ -20,7 +17,6 @@ import (
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/compatibility"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/k8s"
"github.com/siderolabs/talos/pkg/machinery/role"
"github.com/siderolabs/talos/pkg/machinery/version"
)
@ -77,7 +73,6 @@ func (checks *PreflightChecks) Run(ctx context.Context) error {
for _, check := range []func(context.Context) error{
checks.talosVersion,
checks.kubernetesVersion,
} {
if err := check(ctx); err != nil {
return fmt.Errorf("pre-flight checks failed: %w", err)
@ -112,162 +107,6 @@ func (checks *PreflightChecks) talosVersion(ctx context.Context) error {
return checks.installerTalosVersion.UpgradeableFrom(checks.hostTalosVersion)
}
type k8sVersions struct {
kubelet *compatibility.KubernetesVersion
apiServer *compatibility.KubernetesVersion
scheduler *compatibility.KubernetesVersion
controllerManager *compatibility.KubernetesVersion
}
//nolint:gocyclo
func (versions *k8sVersions) gatherVersions(ctx context.Context, client *client.Client) error {
kubeletSpec, err := safe.StateGet[*k8s.KubeletSpec](ctx, client.COSI, k8s.NewKubeletSpec(k8s.NamespaceName, k8s.KubeletID).Metadata())
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("error getting kubelet spec: %w", err)
}
if kubeletSpec != nil {
versions.kubelet, err = KubernetesVersionFromImageRef(kubeletSpec.TypedSpec().Image)
if err != nil {
return fmt.Errorf("error parsing kubelet version: %w", err)
}
}
apiServerSpec, err := safe.StateGet[*k8s.APIServerConfig](ctx, client.COSI, k8s.NewAPIServerConfig().Metadata())
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("error getting API server spec: %w", err)
}
if apiServerSpec != nil {
versions.apiServer, err = KubernetesVersionFromImageRef(apiServerSpec.TypedSpec().Image)
if err != nil {
return fmt.Errorf("error parsing API server version: %w", err)
}
}
schedulerSpec, err := safe.StateGet[*k8s.SchedulerConfig](ctx, client.COSI, k8s.NewSchedulerConfig().Metadata())
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("error getting scheduler spec: %w", err)
}
if schedulerSpec != nil {
versions.scheduler, err = KubernetesVersionFromImageRef(schedulerSpec.TypedSpec().Image)
if err != nil {
return fmt.Errorf("error parsing scheduler version: %w", err)
}
}
controllerManagerSpec, err := safe.StateGet[*k8s.ControllerManagerConfig](ctx, client.COSI, k8s.NewControllerManagerConfig().Metadata())
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("error getting controller manager spec: %w", err)
}
if controllerManagerSpec != nil {
versions.controllerManager, err = KubernetesVersionFromImageRef(controllerManagerSpec.TypedSpec().Image)
if err != nil {
return fmt.Errorf("error parsing controller manager version: %w", err)
}
}
return nil
}
func (versions *k8sVersions) checkCompatibility(target *compatibility.TalosVersion) error {
for _, component := range []struct {
name string
version *compatibility.KubernetesVersion
}{
{
name: "kubelet",
version: versions.kubelet,
},
{
name: "kube-apiserver",
version: versions.apiServer,
},
{
name: "kube-scheduler",
version: versions.scheduler,
},
{
name: "kube-controller-manager",
version: versions.controllerManager,
},
} {
if component.version == nil {
continue
}
if err := component.version.SupportedWith(target); err != nil {
return fmt.Errorf("component %s version issue: %w", component.name, err)
}
}
return nil
}
func (versions *k8sVersions) String() string {
var components []string //nolint:prealloc
for _, component := range []struct {
name string
version *compatibility.KubernetesVersion
}{
{
name: "kubelet",
version: versions.kubelet,
},
{
name: "kube-apiserver",
version: versions.apiServer,
},
{
name: "kube-scheduler",
version: versions.scheduler,
},
{
name: "kube-controller-manager",
version: versions.controllerManager,
},
} {
if component.version == nil {
continue
}
components = append(components, fmt.Sprintf("%s: %s", component.name, component.version))
}
return strings.Join(components, ", ")
}
func (checks *PreflightChecks) kubernetesVersion(ctx context.Context) error {
var versions k8sVersions
if err := versions.gatherVersions(ctx, checks.client); err != nil {
return err
}
log.Printf("host Kubernetes versions: %s", &versions)
return versions.checkCompatibility(checks.installerTalosVersion)
}
// KubernetesVersionFromImageRef parses the Kubernetes version from the image reference.
func KubernetesVersionFromImageRef(ref string) (*compatibility.KubernetesVersion, error) {
idx := strings.LastIndex(ref, ":v")
if idx == -1 {
return nil, fmt.Errorf("invalid image reference: %q", ref)
}
versionPart := ref[idx+2:]
if shaIndex := strings.Index(versionPart, "@"); shaIndex != -1 {
versionPart = versionPart[:shaIndex]
}
return compatibility.ParseKubernetesVersion(versionPart)
}
func unpack[T any](s []T) T {
if len(s) != 1 {
panic("unpack: slice length is not 1")

View File

@ -1,42 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package install_test
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/siderolabs/talos/cmd/installer/pkg/install"
)
func TestKubernetesVersionFromImageRef(t *testing.T) {
t.Parallel()
for _, test := range []struct {
imageRef string
expectedVersion string
}{
{
imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2",
expectedVersion: "1.32.2",
},
{
imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2@sha256:123456",
expectedVersion: "1.32.2",
},
} {
t.Run(test.imageRef, func(t *testing.T) {
t.Parallel()
version, err := install.KubernetesVersionFromImageRef(test.imageRef)
require.NoError(t, err)
assert.Equal(t, test.expectedVersion, version.String())
})
}
}

View File

@ -59,6 +59,15 @@ Talos VMWare platform now supports `arm64` architecture in addition to `amd64`.
description = """\
Talos on Azure now defaults to MTU of 1400 bytes for the `eth0` interface to avoid packet fragmentation issues.
The default MTU can be overriden with machine configuration.
"""
[notes.k8s_version]
title = "Kubernetes Version Validation"
description = """\
Talos now validates Kubernetes version in the image submitted in the machine configuration.
Previously this check was performed only on upgrade, but now it is consistently applied to upgrade, initial provisioning, and machine configuration updates.
This implies that all image references should contain the tag, even if the image is pinned by digest.
"""
[make_deps]

View File

@ -18,7 +18,9 @@ import (
"github.com/siderolabs/talos/cmd/talosctl/pkg/mgmt/helpers"
"github.com/siderolabs/talos/pkg/images"
"github.com/siderolabs/talos/pkg/machinery/compatibility"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/version"
)
// K8sCompatibilitySuite ...
@ -58,6 +60,22 @@ func (suite *K8sCompatibilitySuite) SetupSuite() {
Patch: 0,
}
// while Talos is in alpha stage, DefaultKubernetesVersion might be 1 minor behind the latest alpha Kubernetes version,
// so we need to ensure that minVersion fits into compatibility range
minVersionAdjusted := false
currentTalosVersion, err := compatibility.ParseTalosVersion(version.NewVersion())
suite.Require().NoError(err)
minKubernetesVersion, err := compatibility.ParseKubernetesVersion(minVersion.String())
suite.Require().NoError(err)
if minKubernetesVersion.SupportedWith(currentTalosVersion) != nil {
// bump up minVersion to the next minor version
minVersion.Minor++
minVersionAdjusted = true
}
type versionInfo struct {
Major uint64
Minor uint64
@ -111,7 +129,12 @@ func (suite *K8sCompatibilitySuite) SetupSuite() {
suite.T().Logf("using following upgrade sequence: %v", suite.versionsSequence)
suite.Assert().Len(suite.versionsSequence, constants.SupportedKubernetesVersions)
if minVersionAdjusted {
suite.T().Logf("min Kubernetes version was adjusted to %s to fit Talos compatibility range", minVersion.String())
suite.Assert().Len(suite.versionsSequence, constants.SupportedKubernetesVersions-1)
} else {
suite.Assert().Len(suite.versionsSequence, constants.SupportedKubernetesVersions)
}
suite.BaseSuite.SetupSuite()
}

View File

@ -0,0 +1,51 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package kubernetes
import (
"context"
"fmt"
"golang.org/x/sync/errgroup"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/compatibility"
)
// VerifyVersionCompatibility checks if the given Kubernetes version is compatible with the current Talos version.
func VerifyVersionCompatibility(ctx context.Context, talosClient *client.Client, nodes []string, k8sVersion string, logger func(string, ...any)) error {
eg, ctx := errgroup.WithContext(ctx)
k8sVersionParsed, err := compatibility.ParseKubernetesVersion(k8sVersion)
if err != nil {
return fmt.Errorf("error parsing Kubernetes version %q: %w", k8sVersion, err)
}
for _, node := range nodes {
eg.Go(func() error {
nodeCtx := client.WithNode(ctx, node)
versionResp, err := talosClient.Version(nodeCtx)
if err != nil {
return fmt.Errorf("error getting Talos version on node %q: %w", node, err)
}
talosVersion, err := compatibility.ParseTalosVersion(versionResp.Messages[0].GetVersion())
if err != nil {
return fmt.Errorf("error parsing Talos version on node %q: %w", node, err)
}
if err = k8sVersionParsed.SupportedWith(talosVersion); err != nil {
return fmt.Errorf("compatibility check failed on node %q: %w", node, err)
}
logger("> %q: Talos version %s is compatible with Kubernetes version %s", node, talosVersion, k8sVersion)
return nil
})
}
return eg.Wait()
}

View File

@ -122,6 +122,10 @@ func Upgrade(ctx context.Context, cluster UpgradeProvider, options UpgradeOption
return err
}
if err = VerifyVersionCompatibility(ctx, talosClient, slices.Concat(options.controlPlaneNodes, options.workerNodes), options.Path.ToVersion(), options.Log); err != nil {
return err
}
upgradeChecks, err := upgrade.NewChecks(options.Path, talosClient.COSI, k8sConfig, options.controlPlaneNodes, options.workerNodes, options.Log)
if err != nil {
return err

View File

@ -24,6 +24,7 @@ import (
"github.com/opencontainers/runtime-spec/specs-go"
sideronet "github.com/siderolabs/net"
"github.com/siderolabs/talos/pkg/machinery/compatibility"
"github.com/siderolabs/talos/pkg/machinery/config/config"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
"github.com/siderolabs/talos/pkg/machinery/config/types/block/blockhelpers"
@ -33,6 +34,7 @@ import (
"github.com/siderolabs/talos/pkg/machinery/labels"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
"github.com/siderolabs/talos/pkg/machinery/role"
"github.com/siderolabs/talos/pkg/machinery/version"
)
var (
@ -985,7 +987,70 @@ func (c *Config) RuntimeValidate(ctx context.Context, st state.State, mode valid
if len(c.MachineConfig.Install().Extensions()) > 0 {
warnings = append(warnings, ".machine.install.extensions is deprecated, please see https://www.talos.dev/latest/talos-guides/install/boot-assets/")
}
if err := ValidateKubernetesImageTag(c.Machine().Kubelet().Image()); err != nil {
result = multierror.Append(result, fmt.Errorf("kubelet image is not valid: %w", err))
}
}
if c.ClusterConfig != nil && c.MachineConfig != nil {
if c.Machine().Type().IsControlPlane() {
for _, spec := range []struct {
name string
imageRef string
}{
{
name: "kube-apiserver",
imageRef: c.Cluster().APIServer().Image(),
},
{
name: "kube-controller-manager",
imageRef: c.Cluster().ControllerManager().Image(),
},
{
name: "kube-scheduler",
imageRef: c.Cluster().Scheduler().Image(),
},
} {
if err := ValidateKubernetesImageTag(spec.imageRef); err != nil {
result = multierror.Append(result, fmt.Errorf("%s image is not valid: %w", spec.name, err))
}
}
}
}
return warnings, result.ErrorOrNil()
}
// ValidateKubernetesImageTag validates the Kubernetes image tag format.
func ValidateKubernetesImageTag(imageRef string) error {
// this method is called from RuntimeValidate, so we are inside running Talos,
// so the version of Talos is available, and we can check compatibility
currentTalosVersion, err := compatibility.ParseTalosVersion(version.NewVersion())
if err != nil {
return fmt.Errorf("failed to parse Talos version: %w", err)
}
k8sVersion, err := KubernetesVersionFromImageRef(imageRef)
if err != nil {
return fmt.Errorf("failed to parse Kubernetes version from image reference %q: %w", imageRef, err)
}
return k8sVersion.SupportedWith(currentTalosVersion)
}
// KubernetesVersionFromImageRef parses the Kubernetes version from the image reference.
func KubernetesVersionFromImageRef(ref string) (*compatibility.KubernetesVersion, error) {
idx := strings.LastIndex(ref, ":v")
if idx == -1 {
return nil, fmt.Errorf("invalid image reference: %q", ref)
}
versionPart := ref[idx+2:]
if shaIndex := strings.Index(versionPart, "@"); shaIndex != -1 {
versionPart = versionPart[:shaIndex]
}
return compatibility.ParseKubernetesVersion(versionPart)
}

View File

@ -10,14 +10,18 @@ import (
"strings"
"testing"
"github.com/cosi-project/runtime/pkg/state"
"github.com/cosi-project/runtime/pkg/state/impl/inmem"
"github.com/siderolabs/crypto/x509"
"github.com/siderolabs/go-pointer"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/siderolabs/talos/pkg/machinery/compatibility"
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
"github.com/siderolabs/talos/pkg/machinery/config/validation"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/version"
)
type runtimeMode struct {
@ -2069,3 +2073,158 @@ func TestValidateCNI(t *testing.T) {
})
}
}
func TestKubernetesVersionFromImageRef(t *testing.T) {
t.Parallel()
for _, test := range []struct {
imageRef string
expectedVersion string
}{
{
imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2",
expectedVersion: "1.32.2",
},
{
imageRef: "ghcr.io/siderolabs/kubelet:v1.32.2@sha256:123456",
expectedVersion: "1.32.2",
},
} {
t.Run(test.imageRef, func(t *testing.T) {
t.Parallel()
version, err := v1alpha1.KubernetesVersionFromImageRef(test.imageRef)
require.NoError(t, err)
assert.Equal(t, test.expectedVersion, version.String())
})
}
}
func TestRuntimeValidate(t *testing.T) {
t.Parallel()
endpointURL, err := url.Parse("https://localhost:6443/")
require.NoError(t, err)
for _, test := range []struct {
name string
config *v1alpha1.Config
requiresInstall bool
strict bool
expectedWarnings []string
expectedError string
}{
{
name: "valid",
config: &v1alpha1.Config{
ClusterConfig: &v1alpha1.ClusterConfig{
ControlPlane: &v1alpha1.ControlPlaneConfig{
Endpoint: &v1alpha1.Endpoint{
URL: endpointURL,
},
},
},
MachineConfig: &v1alpha1.MachineConfig{
MachineType: "controlplane",
},
},
},
{
name: "old kubelet version",
config: &v1alpha1.Config{
ClusterConfig: &v1alpha1.ClusterConfig{
ControlPlane: &v1alpha1.ControlPlaneConfig{
Endpoint: &v1alpha1.Endpoint{
URL: endpointURL,
},
},
},
MachineConfig: &v1alpha1.MachineConfig{
MachineType: "worker",
MachineKubelet: &v1alpha1.KubeletConfig{
KubeletImage: constants.KubeletImage + ":v1.24.0",
},
},
},
expectedError: "1 error occurred:\n\t* kubelet image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\n",
},
{
name: "old api-server version",
config: &v1alpha1.Config{
ClusterConfig: &v1alpha1.ClusterConfig{
ControlPlane: &v1alpha1.ControlPlaneConfig{
Endpoint: &v1alpha1.Endpoint{
URL: endpointURL,
},
},
APIServerConfig: &v1alpha1.APIServerConfig{
ContainerImage: constants.KubernetesAPIServerImage + ":v1.24.0",
},
},
MachineConfig: &v1alpha1.MachineConfig{
MachineType: "controlplane",
MachineKubelet: &v1alpha1.KubeletConfig{
KubeletImage: constants.KubeletImage + ":v" + constants.DefaultKubernetesVersion,
},
},
},
expectedError: "1 error occurred:\n\t* kube-apiserver image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\n",
},
{
name: "old controller-manager and scheduler version",
config: &v1alpha1.Config{
ClusterConfig: &v1alpha1.ClusterConfig{
ControlPlane: &v1alpha1.ControlPlaneConfig{
Endpoint: &v1alpha1.Endpoint{
URL: endpointURL,
},
},
APIServerConfig: &v1alpha1.APIServerConfig{
ContainerImage: constants.KubernetesAPIServerImage + ":v" + constants.DefaultKubernetesVersion,
},
ControllerManagerConfig: &v1alpha1.ControllerManagerConfig{
ContainerImage: constants.KubernetesControllerManagerImage + ":v1.24.0",
},
SchedulerConfig: &v1alpha1.SchedulerConfig{
ContainerImage: constants.KubernetesSchedulerImage + ":v1.24.0",
},
},
MachineConfig: &v1alpha1.MachineConfig{
MachineType: "controlplane",
MachineKubelet: &v1alpha1.KubeletConfig{
KubeletImage: constants.KubeletImage + ":v" + constants.DefaultKubernetesVersion,
},
},
},
expectedError: "2 errors occurred:\n\t* kube-controller-manager image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\t* kube-scheduler image is not valid: version of Kubernetes 1.24.0 is too old to be used with Talos VERSION\n\n", //nolint:lll
},
} {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
var opts []validation.Option
if test.strict {
opts = append(opts, validation.WithStrict())
}
st := state.WrapCore(inmem.NewState(""))
warnings, errors := test.config.RuntimeValidate(t.Context(), st, runtimeMode{test.requiresInstall}, opts...)
assert.Equal(t, test.expectedWarnings, warnings)
currentTalosVersion, err := compatibility.ParseTalosVersion(version.NewVersion())
require.NoError(t, err)
if test.expectedError == "" {
assert.NoError(t, errors)
} else {
test.expectedError = strings.ReplaceAll(test.expectedError, "VERSION", currentTalosVersion.String())
assert.EqualError(t, errors, test.expectedError)
}
})
}
}