mirror of
				https://github.com/siderolabs/talos.git
				synced 2025-11-04 02:11:12 +01:00 
			
		
		
		
	This fixes an issue with a single controlplane cluster. Properly present all accepted CAs to the apiserver, in the test let the cluster fully recovery between two CA rotations performed. Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
		
			
				
	
	
		
			218 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			218 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// This Source Code Form is subject to the terms of the Mozilla Public
 | 
						|
// License, v. 2.0. If a copy of the MPL was not distributed with this
 | 
						|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | 
						|
 | 
						|
//go:build integration_api
 | 
						|
 | 
						|
package api
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"testing"
 | 
						|
	"time"
 | 
						|
 | 
						|
	"github.com/cosi-project/runtime/pkg/safe"
 | 
						|
 | 
						|
	"github.com/siderolabs/talos/internal/integration/base"
 | 
						|
	"github.com/siderolabs/talos/pkg/machinery/api/common"
 | 
						|
	"github.com/siderolabs/talos/pkg/machinery/client"
 | 
						|
	"github.com/siderolabs/talos/pkg/machinery/config"
 | 
						|
	"github.com/siderolabs/talos/pkg/machinery/config/encoder"
 | 
						|
	"github.com/siderolabs/talos/pkg/machinery/config/generate/secrets"
 | 
						|
	"github.com/siderolabs/talos/pkg/machinery/config/machine"
 | 
						|
	"github.com/siderolabs/talos/pkg/machinery/constants"
 | 
						|
	secretsres "github.com/siderolabs/talos/pkg/machinery/resources/secrets"
 | 
						|
	"github.com/siderolabs/talos/pkg/provision/access"
 | 
						|
	"github.com/siderolabs/talos/pkg/rotate/pki/kubernetes"
 | 
						|
	"github.com/siderolabs/talos/pkg/rotate/pki/talos"
 | 
						|
)
 | 
						|
 | 
						|
// RotateCASuite verifies rotation of Talos and Kubernetes CAs.
 | 
						|
type RotateCASuite struct {
 | 
						|
	base.APISuite
 | 
						|
 | 
						|
	ctx       context.Context //nolint:containedctx
 | 
						|
	ctxCancel context.CancelFunc
 | 
						|
}
 | 
						|
 | 
						|
// SuiteName ...
 | 
						|
func (suite *RotateCASuite) SuiteName() string {
 | 
						|
	return "api.RotateCASuite"
 | 
						|
}
 | 
						|
 | 
						|
// SetupTest ...
 | 
						|
func (suite *RotateCASuite) SetupTest() {
 | 
						|
	// make sure API calls have timeout
 | 
						|
	suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 5*time.Minute)
 | 
						|
}
 | 
						|
 | 
						|
// TearDownTest ...
 | 
						|
func (suite *RotateCASuite) TearDownTest() {
 | 
						|
	if suite.ctxCancel != nil {
 | 
						|
		suite.ctxCancel()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// TestTalos updates Talos CA in the cluster.
 | 
						|
func (suite *RotateCASuite) TestTalos() {
 | 
						|
	if suite.Cluster == nil {
 | 
						|
		suite.T().Skip("cluster information is not available")
 | 
						|
	}
 | 
						|
 | 
						|
	suite.T().Logf("capturing current Talos CA")
 | 
						|
 | 
						|
	nodeInternalIP := suite.RandomDiscoveredNodeInternalIP(machine.TypeControlPlane)
 | 
						|
 | 
						|
	// save osRoot
 | 
						|
	osRoot, err := safe.StateGetByID[*secretsres.OSRoot](client.WithNode(suite.ctx, nodeInternalIP), suite.Client.COSI, secretsres.OSRootID)
 | 
						|
	suite.Require().NoError(err)
 | 
						|
 | 
						|
	suite.T().Logf("rotating current CA -> new CA")
 | 
						|
 | 
						|
	newBundle, err := secrets.NewBundle(secrets.NewFixedClock(time.Now()), config.TalosVersionCurrent)
 | 
						|
	suite.Require().NoError(err)
 | 
						|
 | 
						|
	options := talos.Options{
 | 
						|
		CurrentClient: suite.Client,
 | 
						|
		ClusterInfo:   access.NewAdapter(suite.Cluster),
 | 
						|
 | 
						|
		ContextName: suite.Talosconfig.Context,
 | 
						|
		Endpoints:   suite.Client.GetEndpoints(),
 | 
						|
 | 
						|
		NewTalosCA: newBundle.Certs.OS,
 | 
						|
 | 
						|
		EncoderOption: encoder.WithComments(encoder.CommentsAll),
 | 
						|
 | 
						|
		Printf: suite.T().Logf,
 | 
						|
	}
 | 
						|
 | 
						|
	newTalosconfig, err := talos.Rotate(suite.ctx, options)
 | 
						|
	suite.Require().NoError(err)
 | 
						|
 | 
						|
	newClient, err := client.New(suite.ctx, client.WithConfig(newTalosconfig))
 | 
						|
	suite.Require().NoError(err)
 | 
						|
 | 
						|
	if !testing.Short() {
 | 
						|
		suite.restartAPIServices(newClient)
 | 
						|
	}
 | 
						|
 | 
						|
	suite.T().Logf("rotating back new CA -> old CA")
 | 
						|
 | 
						|
	options = talos.Options{
 | 
						|
		CurrentClient: newClient,
 | 
						|
		ClusterInfo:   access.NewAdapter(suite.Cluster),
 | 
						|
 | 
						|
		ContextName: suite.Talosconfig.Context,
 | 
						|
		Endpoints:   suite.Client.GetEndpoints(),
 | 
						|
 | 
						|
		NewTalosCA: osRoot.TypedSpec().IssuingCA,
 | 
						|
 | 
						|
		EncoderOption: encoder.WithComments(encoder.CommentsAll),
 | 
						|
 | 
						|
		Printf: suite.T().Logf,
 | 
						|
	}
 | 
						|
 | 
						|
	_, err = talos.Rotate(suite.ctx, options)
 | 
						|
	suite.Require().NoError(err)
 | 
						|
 | 
						|
	suite.AssertClusterHealthy(suite.ctx)
 | 
						|
 | 
						|
	suite.ClearConnectionRefused(suite.ctx, suite.DiscoverNodeInternalIPsByType(suite.ctx, machine.TypeWorker)...)
 | 
						|
}
 | 
						|
 | 
						|
// TestKubernetes updates Kubernetes CA in the cluster.
 | 
						|
func (suite *RotateCASuite) TestKubernetes() {
 | 
						|
	if suite.Cluster == nil {
 | 
						|
		suite.T().Skip("cluster information is not available")
 | 
						|
	}
 | 
						|
 | 
						|
	if testing.Short() {
 | 
						|
		suite.T().Skip("skipping in short mode")
 | 
						|
	}
 | 
						|
 | 
						|
	suite.T().Logf("capturing current Kubernetes CA")
 | 
						|
 | 
						|
	nodeInternalIP := suite.RandomDiscoveredNodeInternalIP(machine.TypeControlPlane)
 | 
						|
 | 
						|
	// save k8sRoot
 | 
						|
	k8sRoot, err := safe.StateGetByID[*secretsres.KubernetesRoot](client.WithNode(suite.ctx, nodeInternalIP), suite.Client.COSI, secretsres.KubernetesRootID)
 | 
						|
	suite.Require().NoError(err)
 | 
						|
 | 
						|
	suite.T().Logf("rotating current CA -> new CA")
 | 
						|
 | 
						|
	newBundle, err := secrets.NewBundle(secrets.NewFixedClock(time.Now()), config.TalosVersionCurrent)
 | 
						|
	suite.Require().NoError(err)
 | 
						|
 | 
						|
	options := kubernetes.Options{
 | 
						|
		TalosClient: suite.Client,
 | 
						|
		ClusterInfo: access.NewAdapter(suite.Cluster),
 | 
						|
 | 
						|
		NewKubernetesCA: newBundle.Certs.K8s,
 | 
						|
 | 
						|
		EncoderOption: encoder.WithComments(encoder.CommentsAll),
 | 
						|
 | 
						|
		Printf: suite.T().Logf,
 | 
						|
	}
 | 
						|
 | 
						|
	suite.Require().NoError(kubernetes.Rotate(suite.ctx, options))
 | 
						|
 | 
						|
	suite.AssertClusterHealthy(suite.ctx)
 | 
						|
 | 
						|
	suite.T().Logf("rotating back new CA -> old CA")
 | 
						|
 | 
						|
	options = kubernetes.Options{
 | 
						|
		TalosClient: suite.Client,
 | 
						|
		ClusterInfo: access.NewAdapter(suite.Cluster),
 | 
						|
 | 
						|
		NewKubernetesCA: k8sRoot.TypedSpec().IssuingCA,
 | 
						|
 | 
						|
		EncoderOption: encoder.WithComments(encoder.CommentsAll),
 | 
						|
 | 
						|
		Printf: suite.T().Logf,
 | 
						|
	}
 | 
						|
 | 
						|
	suite.Require().NoError(kubernetes.Rotate(suite.ctx, options))
 | 
						|
 | 
						|
	suite.AssertClusterHealthy(suite.ctx)
 | 
						|
}
 | 
						|
 | 
						|
func (suite *RotateCASuite) restartAPIServices(c *client.Client) {
 | 
						|
	suite.T().Logf("restarting API services")
 | 
						|
 | 
						|
	var oldClient *client.Client
 | 
						|
	oldClient, suite.Client = suite.Client, c
 | 
						|
 | 
						|
	defer func() {
 | 
						|
		suite.Client = oldClient
 | 
						|
	}()
 | 
						|
 | 
						|
	for _, node := range suite.DiscoverNodeInternalIPsByType(suite.ctx, machine.TypeControlPlane) {
 | 
						|
		suite.T().Logf("restarting API services on %s", node)
 | 
						|
 | 
						|
		err := c.Restart(client.WithNode(suite.ctx, node), constants.SystemContainerdNamespace, common.ContainerDriver_CONTAINERD, "trustd")
 | 
						|
		suite.Require().NoError(err)
 | 
						|
 | 
						|
		suite.ClearConnectionRefused(suite.ctx, node)
 | 
						|
 | 
						|
		err = c.Restart(client.WithNode(suite.ctx, node), constants.SystemContainerdNamespace, common.ContainerDriver_CONTAINERD, "apid")
 | 
						|
		suite.Require().NoError(err)
 | 
						|
 | 
						|
		suite.ClearConnectionRefused(suite.ctx, node)
 | 
						|
	}
 | 
						|
 | 
						|
	for _, node := range suite.DiscoverNodeInternalIPsByType(suite.ctx, machine.TypeWorker) {
 | 
						|
		suite.T().Logf("restarting API services on %s", node)
 | 
						|
 | 
						|
		err := c.Restart(client.WithNode(suite.ctx, node), constants.SystemContainerdNamespace, common.ContainerDriver_CONTAINERD, "apid")
 | 
						|
		suite.Require().NoError(err)
 | 
						|
 | 
						|
		suite.ClearConnectionRefused(suite.ctx, node)
 | 
						|
	}
 | 
						|
 | 
						|
	suite.AssertClusterHealthy(suite.ctx)
 | 
						|
}
 | 
						|
 | 
						|
func init() {
 | 
						|
	allSuites = append(allSuites, new(RotateCASuite))
 | 
						|
}
 |