mirror of
https://github.com/siderolabs/talos.git
synced 2025-08-08 07:37:06 +02:00
This adds an API for recovering the self-hosted control plane. Signed-off-by: Andrew Rynhard <andrew@andrewrynhard.com>
111 lines
2.6 KiB
Go
111 lines
2.6 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
// +build integration_api
|
|
|
|
package api
|
|
|
|
import (
|
|
"context"
|
|
"sort"
|
|
"testing"
|
|
"time"
|
|
|
|
"golang.org/x/sync/errgroup"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
"github.com/talos-systems/talos/api/machine"
|
|
"github.com/talos-systems/talos/internal/integration/base"
|
|
"github.com/talos-systems/talos/pkg/client"
|
|
)
|
|
|
|
type RecoverSuite struct {
|
|
base.K8sSuite
|
|
|
|
ctx context.Context
|
|
ctxCancel context.CancelFunc
|
|
}
|
|
|
|
// SuiteName ...
|
|
func (suite *RecoverSuite) SuiteName() string {
|
|
return "api.RecoverSuite"
|
|
}
|
|
|
|
// SetupTest ...
|
|
func (suite *RecoverSuite) SetupTest() {
|
|
if testing.Short() {
|
|
suite.T().Skip("skipping in short mode")
|
|
}
|
|
|
|
// make sure we abort at some point in time, but give enough room for Recovers
|
|
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 30*time.Minute)
|
|
}
|
|
|
|
// TearDownTest ...
|
|
func (suite *RecoverSuite) TearDownTest() {
|
|
suite.ctxCancel()
|
|
}
|
|
|
|
// TestRecoverControlPlane removes the control plane components and attempts to recover them with the recover API.
|
|
func (suite *RecoverSuite) TestRecoverControlPlane() {
|
|
if !suite.Capabilities().SupportsRecover {
|
|
suite.T().Skip("cluster doesn't support recovery")
|
|
}
|
|
|
|
if suite.Cluster == nil {
|
|
suite.T().Skip("without full cluster state recover test is not reliable (can't wait for cluster readiness)")
|
|
}
|
|
|
|
pods, err := suite.Clientset.CoreV1().Pods("kube-system").List(suite.ctx, metav1.ListOptions{
|
|
LabelSelector: "k8s-app in (kube-scheduler,kube-controller-manager)",
|
|
})
|
|
|
|
suite.Assert().NoError(err)
|
|
|
|
var eg errgroup.Group
|
|
|
|
for _, pod := range pods.Items {
|
|
pod := pod
|
|
|
|
eg.Go(func() error {
|
|
suite.T().Logf("Deleting %s", pod.GetName())
|
|
|
|
err := suite.Clientset.CoreV1().Pods(pod.GetNamespace()).Delete(suite.ctx, pod.GetName(), metav1.DeleteOptions{})
|
|
|
|
return err
|
|
})
|
|
}
|
|
|
|
suite.Assert().NoError(eg.Wait())
|
|
|
|
nodes := suite.DiscoverNodes()
|
|
suite.Require().NotEmpty(nodes)
|
|
|
|
sort.Strings(nodes)
|
|
|
|
node := nodes[0]
|
|
|
|
suite.T().Log("Recovering control plane")
|
|
|
|
ctx, ctxCancel := context.WithTimeout(suite.ctx, 5*time.Minute)
|
|
defer ctxCancel()
|
|
|
|
nodeCtx := client.WithNodes(ctx, node)
|
|
|
|
in := &machine.RecoverRequest{
|
|
Source: machine.RecoverRequest_APISERVER,
|
|
}
|
|
|
|
_, err = suite.Client.MachineClient.Recover(nodeCtx, in)
|
|
|
|
suite.Assert().NoError(err)
|
|
|
|
// NB: using `ctx` here to have client talking to init node by default
|
|
suite.AssertClusterHealthy(ctx)
|
|
}
|
|
|
|
func init() {
|
|
allSuites = append(allSuites, new(RecoverSuite))
|
|
}
|