mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-18 19:11:12 +02:00
feat: add etcd API
This adds RPCs for basic etcd management tasks. Signed-off-by: Andrew Rynhard <andrew@rynhard.io>
This commit is contained in:
parent
9c969a4be5
commit
4eeef28e90
@ -14,7 +14,8 @@ import "common/common.proto";
|
|||||||
|
|
||||||
// The machine service definition.
|
// The machine service definition.
|
||||||
service MachineService {
|
service MachineService {
|
||||||
rpc ApplyConfiguration(ApplyConfigurationRequest) returns (ApplyConfigurationResponse);
|
rpc ApplyConfiguration(ApplyConfigurationRequest)
|
||||||
|
returns (ApplyConfigurationResponse);
|
||||||
rpc Bootstrap(BootstrapRequest) returns (BootstrapResponse);
|
rpc Bootstrap(BootstrapRequest) returns (BootstrapResponse);
|
||||||
rpc Containers(ContainersRequest) returns (ContainersResponse);
|
rpc Containers(ContainersRequest) returns (ContainersResponse);
|
||||||
rpc Copy(CopyRequest) returns (stream common.Data);
|
rpc Copy(CopyRequest) returns (stream common.Data);
|
||||||
@ -22,6 +23,11 @@ service MachineService {
|
|||||||
rpc DiskStats(google.protobuf.Empty) returns (DiskStatsResponse);
|
rpc DiskStats(google.protobuf.Empty) returns (DiskStatsResponse);
|
||||||
rpc Dmesg(DmesgRequest) returns (stream common.Data);
|
rpc Dmesg(DmesgRequest) returns (stream common.Data);
|
||||||
rpc Events(EventsRequest) returns (stream Event);
|
rpc Events(EventsRequest) returns (stream Event);
|
||||||
|
rpc EtcdMemberList(EtcdMemberListRequest) returns (EtcdMemberListResponse);
|
||||||
|
rpc EtcdLeaveCluster(EtcdLeaveClusterRequest)
|
||||||
|
returns (EtcdLeaveClusterResponse);
|
||||||
|
rpc EtcdForfeitLeadership(EtcdForfeitLeadershipRequest)
|
||||||
|
returns (EtcdForfeitLeadershipResponse);
|
||||||
rpc Hostname(google.protobuf.Empty) returns (HostnameResponse);
|
rpc Hostname(google.protobuf.Empty) returns (HostnameResponse);
|
||||||
rpc Kubeconfig(google.protobuf.Empty) returns (stream common.Data);
|
rpc Kubeconfig(google.protobuf.Empty) returns (stream common.Data);
|
||||||
rpc List(ListRequest) returns (stream FileInfo);
|
rpc List(ListRequest) returns (stream FileInfo);
|
||||||
@ -29,7 +35,8 @@ service MachineService {
|
|||||||
rpc Logs(LogsRequest) returns (stream common.Data);
|
rpc Logs(LogsRequest) returns (stream common.Data);
|
||||||
rpc Memory(google.protobuf.Empty) returns (MemoryResponse);
|
rpc Memory(google.protobuf.Empty) returns (MemoryResponse);
|
||||||
rpc Mounts(google.protobuf.Empty) returns (MountsResponse);
|
rpc Mounts(google.protobuf.Empty) returns (MountsResponse);
|
||||||
rpc NetworkDeviceStats(google.protobuf.Empty) returns (NetworkDeviceStatsResponse);
|
rpc NetworkDeviceStats(google.protobuf.Empty)
|
||||||
|
returns (NetworkDeviceStatsResponse);
|
||||||
rpc Processes(google.protobuf.Empty) returns (ProcessesResponse);
|
rpc Processes(google.protobuf.Empty) returns (ProcessesResponse);
|
||||||
rpc Read(ReadRequest) returns (stream common.Data);
|
rpc Read(ReadRequest) returns (stream common.Data);
|
||||||
rpc Reboot(google.protobuf.Empty) returns (RebootResponse);
|
rpc Reboot(google.protobuf.Empty) returns (RebootResponse);
|
||||||
@ -49,10 +56,9 @@ service MachineService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// rpc applyConfiguration
|
// rpc applyConfiguration
|
||||||
// ApplyConfiguration describes a request to assert a new configuration upon a node.
|
// ApplyConfiguration describes a request to assert a new configuration upon a
|
||||||
message ApplyConfigurationRequest {
|
// node.
|
||||||
bytes data = 1;
|
message ApplyConfigurationRequest { bytes data = 1; }
|
||||||
}
|
|
||||||
|
|
||||||
// ApplyConfigurationResponse describes the response to a configuration request.
|
// ApplyConfigurationResponse describes the response to a configuration request.
|
||||||
message ApplyConfiguration { common.Metadata metadata = 1; }
|
message ApplyConfiguration { common.Metadata metadata = 1; }
|
||||||
@ -499,9 +505,7 @@ message MemInfo {
|
|||||||
|
|
||||||
// rpc Hostname
|
// rpc Hostname
|
||||||
|
|
||||||
message HostnameResponse {
|
message HostnameResponse { repeated Hostname messages = 1; }
|
||||||
repeated Hostname messages = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message Hostname {
|
message Hostname {
|
||||||
common.Metadata metadata = 1;
|
common.Metadata metadata = 1;
|
||||||
@ -510,9 +514,7 @@ message Hostname {
|
|||||||
|
|
||||||
// rpc LoadAvg
|
// rpc LoadAvg
|
||||||
|
|
||||||
message LoadAvgResponse {
|
message LoadAvgResponse { repeated LoadAvg messages = 1; }
|
||||||
repeated LoadAvg messages = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message LoadAvg {
|
message LoadAvg {
|
||||||
common.Metadata metadata = 1;
|
common.Metadata metadata = 1;
|
||||||
@ -521,12 +523,9 @@ message LoadAvg {
|
|||||||
double load15 = 4;
|
double load15 = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// rpc SystemStat
|
// rpc SystemStat
|
||||||
|
|
||||||
message SystemStatResponse {
|
message SystemStatResponse { repeated SystemStat messages = 1; }
|
||||||
repeated SystemStat messages = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message SystemStat {
|
message SystemStat {
|
||||||
common.Metadata metadata = 1;
|
common.Metadata metadata = 1;
|
||||||
@ -569,12 +568,9 @@ message SoftIRQStat {
|
|||||||
uint64 rcu = 10;
|
uint64 rcu = 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// rpc CPUInfo
|
// rpc CPUInfo
|
||||||
|
|
||||||
message CPUInfoResponse {
|
message CPUInfoResponse { repeated CPUsInfo messages = 1; }
|
||||||
repeated CPUsInfo messages = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message CPUsInfo {
|
message CPUsInfo {
|
||||||
common.Metadata metadata = 1;
|
common.Metadata metadata = 1;
|
||||||
@ -610,12 +606,9 @@ message CPUInfo {
|
|||||||
string power_management = 26;
|
string power_management = 26;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// rpc NetworkDeviceStats
|
// rpc NetworkDeviceStats
|
||||||
|
|
||||||
message NetworkDeviceStatsResponse {
|
message NetworkDeviceStatsResponse { repeated NetworkDeviceStats messages = 1; }
|
||||||
repeated NetworkDeviceStats messages = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message NetworkDeviceStats {
|
message NetworkDeviceStats {
|
||||||
common.Metadata metadata = 1;
|
common.Metadata metadata = 1;
|
||||||
@ -643,12 +636,9 @@ message NetDev {
|
|||||||
uint64 tx_compressed = 17;
|
uint64 tx_compressed = 17;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// rpc DiskStats
|
// rpc DiskStats
|
||||||
|
|
||||||
message DiskStatsResponse {
|
message DiskStatsResponse { repeated DiskStats messages = 1; }
|
||||||
repeated DiskStats messages = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message DiskStats {
|
message DiskStats {
|
||||||
common.Metadata metadata = 1;
|
common.Metadata metadata = 1;
|
||||||
@ -674,3 +664,23 @@ message DiskStat {
|
|||||||
uint64 discard_sectors = 15;
|
uint64 discard_sectors = 15;
|
||||||
uint64 discard_time_ms = 16;
|
uint64 discard_time_ms = 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message EtcdLeaveClusterRequest {}
|
||||||
|
message EtcdLeaveCluster { common.Metadata metadata = 1; }
|
||||||
|
message EtcdLeaveClusterResponse { repeated EtcdLeaveCluster messages = 1; }
|
||||||
|
|
||||||
|
message EtcdForfeitLeadershipRequest {}
|
||||||
|
message EtcdForfeitLeadership {
|
||||||
|
common.Metadata metadata = 1;
|
||||||
|
string member = 2;
|
||||||
|
}
|
||||||
|
message EtcdForfeitLeadershipResponse {
|
||||||
|
repeated EtcdForfeitLeadership messages = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message EtcdMemberListRequest {}
|
||||||
|
message EtcdMemberList {
|
||||||
|
common.Metadata metadata = 1;
|
||||||
|
repeated string members = 2;
|
||||||
|
}
|
||||||
|
message EtcdMemberListResponse { repeated EtcdMemberList messages = 1; }
|
||||||
|
@ -14,7 +14,6 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/kubernetes-sigs/bootkube/pkg/recovery"
|
"github.com/kubernetes-sigs/bootkube/pkg/recovery"
|
||||||
"go.etcd.io/etcd/clientv3"
|
|
||||||
k8saes "k8s.io/apiserver/pkg/storage/value/encrypt/aes"
|
k8saes "k8s.io/apiserver/pkg/storage/value/encrypt/aes"
|
||||||
|
|
||||||
"github.com/talos-systems/talos/internal/pkg/etcd"
|
"github.com/talos-systems/talos/internal/pkg/etcd"
|
||||||
@ -40,7 +39,7 @@ func recoverAssets(config config.Provider) error {
|
|||||||
|
|
||||||
switch *recoverSource {
|
switch *recoverSource {
|
||||||
case machineapi.RecoverRequest_ETCD.String():
|
case machineapi.RecoverRequest_ETCD.String():
|
||||||
var client *clientv3.Client
|
var client *etcd.Client
|
||||||
|
|
||||||
client, err = etcd.NewClient([]string{"127.0.0.1:2379"})
|
client, err = etcd.NewClient([]string{"127.0.0.1:2379"})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -54,7 +53,7 @@ func recoverAssets(config config.Provider) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
backend = recovery.NewEtcdBackendWithTransformer(client, "/registry", transform)
|
backend = recovery.NewEtcdBackendWithTransformer(client.Client, "/registry", transform)
|
||||||
case machineapi.RecoverRequest_APISERVER.String():
|
case machineapi.RecoverRequest_APISERVER.String():
|
||||||
backend, err = recovery.NewAPIServerBackend(constants.RecoveryKubeconfig)
|
backend, err = recovery.NewAPIServerBackend(constants.RecoveryKubeconfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -240,7 +240,12 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
|
|||||||
return nil, fmt.Errorf("error validating installer image %q: %w", in.GetImage(), err)
|
return nil, fmt.Errorf("error validating installer image %q: %w", in.GetImage(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = etcd.ValidateForUpgrade(s.Controller.Runtime().Config(), in.GetPreserve()); err != nil {
|
client, err := etcd.NewClientFromControlPlaneIPs(ctx, s.Controller.Runtime().Config().Cluster().CA(), s.Controller.Runtime().Config().Cluster().Endpoint())
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create etcd client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = client.ValidateForUpgrade(ctx, s.Controller.Runtime().Config(), in.GetPreserve()); err != nil {
|
||||||
return nil, fmt.Errorf("error validating etcd for upgrade: %w", err)
|
return nil, fmt.Errorf("error validating etcd for upgrade: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1200,3 +1205,84 @@ func (s *Server) Memory(ctx context.Context, in *empty.Empty) (reply *machine.Me
|
|||||||
|
|
||||||
return reply, err
|
return reply, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EtcdMemberList implements the machine.MachineServer interface.
|
||||||
|
func (s *Server) EtcdMemberList(ctx context.Context, in *machine.EtcdMemberListRequest) (reply *machine.EtcdMemberListResponse, err error) {
|
||||||
|
client, err := etcd.NewClientFromControlPlaneIPs(ctx, s.Controller.Runtime().Config().Cluster().CA(), s.Controller.Runtime().Config().Cluster().Endpoint())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// nolint: errcheck
|
||||||
|
defer client.Close()
|
||||||
|
|
||||||
|
resp, err := client.MemberList(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
members := make([]string, 0, len(resp.Members))
|
||||||
|
|
||||||
|
for _, member := range resp.Members {
|
||||||
|
members = append(members, member.GetName())
|
||||||
|
}
|
||||||
|
|
||||||
|
reply = &machine.EtcdMemberListResponse{
|
||||||
|
Messages: []*machine.EtcdMemberList{
|
||||||
|
{
|
||||||
|
Members: members,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return reply, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// EtcdLeaveCluster implements the machine.MachineServer interface.
|
||||||
|
func (s *Server) EtcdLeaveCluster(ctx context.Context, in *machine.EtcdLeaveClusterRequest) (reply *machine.EtcdLeaveClusterResponse, err error) {
|
||||||
|
client, err := etcd.NewClientFromControlPlaneIPs(ctx, s.Controller.Runtime().Config().Cluster().CA(), s.Controller.Runtime().Config().Cluster().Endpoint())
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create etcd client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// nolint: errcheck
|
||||||
|
defer client.Close()
|
||||||
|
|
||||||
|
if err = client.LeaveCluster(ctx); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to leave cluster: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
reply = &machine.EtcdLeaveClusterResponse{
|
||||||
|
Messages: []*machine.EtcdLeaveCluster{
|
||||||
|
{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return reply, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// EtcdForfeitLeadership implements the machine.MachineServer interface.
|
||||||
|
func (s *Server) EtcdForfeitLeadership(ctx context.Context, in *machine.EtcdForfeitLeadershipRequest) (reply *machine.EtcdForfeitLeadershipResponse, err error) {
|
||||||
|
client, err := etcd.NewClientFromControlPlaneIPs(ctx, s.Controller.Runtime().Config().Cluster().CA(), s.Controller.Runtime().Config().Cluster().Endpoint())
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create etcd client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// nolint: errcheck
|
||||||
|
defer client.Close()
|
||||||
|
|
||||||
|
leader, err := client.ForfeitLeadership(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to forfeit leadership: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
reply = &machine.EtcdForfeitLeadershipResponse{
|
||||||
|
Messages: []*machine.EtcdForfeitLeadership{
|
||||||
|
{
|
||||||
|
Member: leader,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return reply, nil
|
||||||
|
}
|
||||||
|
@ -1160,53 +1160,16 @@ func UncordonNode(seq runtime.Sequence, data interface{}) (runtime.TaskExecution
|
|||||||
// nolint: gocyclo
|
// nolint: gocyclo
|
||||||
func LeaveEtcd(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
|
func LeaveEtcd(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
|
||||||
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) {
|
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) {
|
||||||
hostname, err := os.Hostname()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
client, err := etcd.NewClientFromControlPlaneIPs(ctx, r.Config().Cluster().CA(), r.Config().Cluster().Endpoint())
|
client, err := etcd.NewClientFromControlPlaneIPs(ctx, r.Config().Cluster().CA(), r.Config().Cluster().Endpoint())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("failed to create etcd client: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// nolint: errcheck
|
// nolint: errcheck
|
||||||
defer client.Close()
|
defer client.Close()
|
||||||
|
|
||||||
resp, err := client.MemberList(ctx)
|
if err = client.LeaveCluster(ctx); err != nil {
|
||||||
if err != nil {
|
return fmt.Errorf("failed to leave cluster: %w", err)
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var id *uint64
|
|
||||||
|
|
||||||
for _, member := range resp.Members {
|
|
||||||
if member.Name == hostname {
|
|
||||||
member := member
|
|
||||||
id = &member.ID
|
|
||||||
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if id == nil {
|
|
||||||
return fmt.Errorf("failed to find %q in list of etcd members", hostname)
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.Println("leaving etcd cluster")
|
|
||||||
|
|
||||||
_, err = client.MemberRemove(ctx, *id)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = system.Services(nil).Stop(ctx, "etcd"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Once the member is removed, the data is no longer valid.
|
|
||||||
if err = os.RemoveAll(constants.EtcdDataPath); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
139
internal/integration/api/etcd.go
Normal file
139
internal/integration/api/etcd.go
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
// +build integration_api
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/golang/protobuf/ptypes/empty"
|
||||||
|
"google.golang.org/grpc/codes"
|
||||||
|
"google.golang.org/grpc/status"
|
||||||
|
|
||||||
|
"github.com/talos-systems/talos/internal/integration/base"
|
||||||
|
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
|
||||||
|
"github.com/talos-systems/talos/pkg/machinery/client"
|
||||||
|
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
|
||||||
|
"github.com/talos-systems/talos/pkg/machinery/constants"
|
||||||
|
)
|
||||||
|
|
||||||
|
type EtcdSuite struct {
|
||||||
|
base.APISuite
|
||||||
|
|
||||||
|
ctx context.Context
|
||||||
|
ctxCancel context.CancelFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
// SuiteName ...
|
||||||
|
func (suite *EtcdSuite) SuiteName() string {
|
||||||
|
return "api.EtcdSuite"
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetupTest ...
|
||||||
|
func (suite *EtcdSuite) SetupTest() {
|
||||||
|
if testing.Short() {
|
||||||
|
suite.T().Skip("skipping in short mode")
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure we abort at some point in time, but give enough room for Etcds
|
||||||
|
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 30*time.Minute)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TearDownTest ...
|
||||||
|
func (suite *EtcdSuite) TearDownTest() {
|
||||||
|
if suite.ctxCancel != nil {
|
||||||
|
suite.ctxCancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEtcdForfeitLeadership tests moving etcd leadership to another member.
|
||||||
|
func (suite *EtcdSuite) TestEtcdForfeitLeadership() {
|
||||||
|
if suite.Cluster == nil {
|
||||||
|
suite.T().Skip("without full cluster state etcd test is not reliable (can't wait for cluster readiness in between resets)")
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes := suite.DiscoverNodes().NodesByType(machine.TypeControlPlane)
|
||||||
|
|
||||||
|
var leader string
|
||||||
|
|
||||||
|
for _, node := range nodes {
|
||||||
|
resp, err := suite.Client.MachineClient.EtcdForfeitLeadership(client.WithNodes(suite.ctx, node), &machineapi.EtcdForfeitLeadershipRequest{})
|
||||||
|
suite.Require().NoError(err)
|
||||||
|
|
||||||
|
suite.Assert().Empty(resp.Messages[0].Metadata.Error, "node: %s", node)
|
||||||
|
|
||||||
|
if resp.Messages[0].GetMember() != "" {
|
||||||
|
leader = resp.Messages[0].GetMember()
|
||||||
|
|
||||||
|
suite.T().Log("Moved leadership to", leader)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
suite.Assert().NotEmpty(leader)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEtcdLeaveCluster tests removing an etcd member.
|
||||||
|
func (suite *EtcdSuite) TestEtcdLeaveCluster() {
|
||||||
|
if !suite.Capabilities().SupportsReboot {
|
||||||
|
suite.T().Skip("cluster doesn't support reboot (and reset)")
|
||||||
|
}
|
||||||
|
|
||||||
|
if suite.Cluster == nil {
|
||||||
|
suite.T().Skip("without full cluster state reset test is not reliable (can't wait for cluster readiness in between resets)")
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes := suite.DiscoverNodes().NodesByType(machine.TypeControlPlane)
|
||||||
|
|
||||||
|
node := nodes[2]
|
||||||
|
|
||||||
|
suite.T().Log("Removing etcd member", node)
|
||||||
|
|
||||||
|
nodeCtx := client.WithNodes(suite.ctx, node)
|
||||||
|
|
||||||
|
_, err := suite.Client.MachineClient.EtcdForfeitLeadership(nodeCtx, &machineapi.EtcdForfeitLeadershipRequest{})
|
||||||
|
suite.Require().NoError(err)
|
||||||
|
|
||||||
|
_, err = suite.Client.MachineClient.EtcdLeaveCluster(nodeCtx, &machineapi.EtcdLeaveClusterRequest{})
|
||||||
|
suite.Require().NoError(err)
|
||||||
|
|
||||||
|
services, err := suite.Client.MachineClient.ServiceList(nodeCtx, &empty.Empty{})
|
||||||
|
suite.Require().NoError(err)
|
||||||
|
|
||||||
|
for _, service := range services.Messages[0].GetServices() {
|
||||||
|
if service.Id == "etcd" && service.State != "Finished" {
|
||||||
|
suite.Assert().Equal("Finished", service.State)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stream, err := suite.Client.MachineClient.List(nodeCtx, &machineapi.ListRequest{Root: constants.EtcdDataPath})
|
||||||
|
suite.Require().NoError(err)
|
||||||
|
|
||||||
|
for {
|
||||||
|
info, err := stream.Recv()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF || status.Code(err) == codes.Canceled {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
suite.Assert().Equal("rpc error: code = Unknown desc = lstat /var/lib/etcd: no such file or directory", info.Metadata.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NB: Reboot the node so that it can rejoin the etcd cluster. This allows us
|
||||||
|
// to check the cluster health and catch any issues in rejoining.
|
||||||
|
suite.AssertRebooted(suite.ctx, node, func(nodeCtx context.Context) error {
|
||||||
|
_, err = suite.Client.MachineClient.Reboot(nodeCtx, &empty.Empty{})
|
||||||
|
|
||||||
|
return err
|
||||||
|
}, 10*time.Minute)
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
allSuites = append(allSuites, new(EtcdSuite))
|
||||||
|
}
|
@ -7,10 +7,13 @@ package etcd
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"go.etcd.io/etcd/clientv3"
|
"go.etcd.io/etcd/clientv3"
|
||||||
|
"go.etcd.io/etcd/etcdserver/etcdserverpb"
|
||||||
"go.etcd.io/etcd/pkg/transport"
|
"go.etcd.io/etcd/pkg/transport"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
|
|
||||||
@ -18,15 +21,21 @@ import (
|
|||||||
|
|
||||||
"github.com/talos-systems/net"
|
"github.com/talos-systems/net"
|
||||||
|
|
||||||
|
"github.com/talos-systems/talos/internal/app/machined/pkg/system"
|
||||||
"github.com/talos-systems/talos/pkg/kubernetes"
|
"github.com/talos-systems/talos/pkg/kubernetes"
|
||||||
"github.com/talos-systems/talos/pkg/machinery/config"
|
"github.com/talos-systems/talos/pkg/machinery/config"
|
||||||
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
|
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
|
||||||
"github.com/talos-systems/talos/pkg/machinery/constants"
|
"github.com/talos-systems/talos/pkg/machinery/constants"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Client is a wrapper around the official etcd client.
|
||||||
|
type Client struct {
|
||||||
|
*clientv3.Client
|
||||||
|
}
|
||||||
|
|
||||||
// NewClient initializes and returns an etcd client configured to talk to
|
// NewClient initializes and returns an etcd client configured to talk to
|
||||||
// a local endpoint.
|
// a local endpoint.
|
||||||
func NewClient(endpoints []string) (client *clientv3.Client, err error) {
|
func NewClient(endpoints []string) (client *Client, err error) {
|
||||||
tlsInfo := transport.TLSInfo{
|
tlsInfo := transport.TLSInfo{
|
||||||
CertFile: constants.KubernetesEtcdPeerCert,
|
CertFile: constants.KubernetesEtcdPeerCert,
|
||||||
KeyFile: constants.KubernetesEtcdPeerKey,
|
KeyFile: constants.KubernetesEtcdPeerKey,
|
||||||
@ -38,7 +47,7 @@ func NewClient(endpoints []string) (client *clientv3.Client, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
client, err = clientv3.New(clientv3.Config{
|
c, err := clientv3.New(clientv3.Config{
|
||||||
Endpoints: endpoints,
|
Endpoints: endpoints,
|
||||||
DialTimeout: 5 * time.Second,
|
DialTimeout: 5 * time.Second,
|
||||||
DialOptions: []grpc.DialOption{grpc.WithBlock()},
|
DialOptions: []grpc.DialOption{grpc.WithBlock()},
|
||||||
@ -48,12 +57,12 @@ func NewClient(endpoints []string) (client *clientv3.Client, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return client, nil
|
return &Client{Client: c}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewClientFromControlPlaneIPs initializes and returns an etcd client
|
// NewClientFromControlPlaneIPs initializes and returns an etcd client
|
||||||
// configured to talk to all members.
|
// configured to talk to all members.
|
||||||
func NewClientFromControlPlaneIPs(ctx context.Context, creds *x509.PEMEncodedCertificateAndKey, endpoint *url.URL) (client *clientv3.Client, err error) {
|
func NewClientFromControlPlaneIPs(ctx context.Context, creds *x509.PEMEncodedCertificateAndKey, endpoint *url.URL) (client *Client, err error) {
|
||||||
h, err := kubernetes.NewTemporaryClientFromPKI(creds, endpoint)
|
h, err := kubernetes.NewTemporaryClientFromPKI(creds, endpoint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -75,17 +84,9 @@ func NewClientFromControlPlaneIPs(ctx context.Context, creds *x509.PEMEncodedCer
|
|||||||
|
|
||||||
// ValidateForUpgrade validates the etcd cluster state to ensure that performing
|
// ValidateForUpgrade validates the etcd cluster state to ensure that performing
|
||||||
// an upgrade is safe.
|
// an upgrade is safe.
|
||||||
func ValidateForUpgrade(config config.Provider, preserve bool) error {
|
func (c *Client) ValidateForUpgrade(ctx context.Context, config config.Provider, preserve bool) error {
|
||||||
if config.Machine().Type() != machine.TypeJoin {
|
if config.Machine().Type() != machine.TypeJoin {
|
||||||
client, err := NewClientFromControlPlaneIPs(context.TODO(), config.Cluster().CA(), config.Cluster().Endpoint())
|
resp, err := c.MemberList(context.Background())
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// nolint: errcheck
|
|
||||||
defer client.Close()
|
|
||||||
|
|
||||||
resp, err := client.MemberList(context.Background())
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -106,3 +107,112 @@ func ValidateForUpgrade(config config.Provider, preserve bool) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LeaveCluster removes the current member from the etcd cluster.
|
||||||
|
//
|
||||||
|
// nolint: gocyclo
|
||||||
|
func (c *Client) LeaveCluster(ctx context.Context) error {
|
||||||
|
hostname, err := os.Hostname()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := c.MemberList(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var id *uint64
|
||||||
|
|
||||||
|
for _, member := range resp.Members {
|
||||||
|
if member.Name == hostname {
|
||||||
|
member := member
|
||||||
|
id = &member.ID
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if id == nil {
|
||||||
|
return fmt.Errorf("failed to find %q in list of etcd members", hostname)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = c.MemberRemove(ctx, *id)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to remove member %d: %w", *id, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = system.Services(nil).Stop(ctx, "etcd"); err != nil {
|
||||||
|
return fmt.Errorf("failed to stop etcd: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once the member is removed, the data is no longer valid.
|
||||||
|
if err = os.RemoveAll(constants.EtcdDataPath); err != nil {
|
||||||
|
return fmt.Errorf("failed to remove %s: %w", constants.EtcdDataPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ForfeitLeadership transfers leadership from the current member to another
|
||||||
|
// member.
|
||||||
|
//
|
||||||
|
// nolint: gocyclo
|
||||||
|
func (c *Client) ForfeitLeadership(ctx context.Context) (string, error) {
|
||||||
|
hostname, err := os.Hostname()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to get hostname: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := c.MemberList(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to list etcd members: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(resp.Members) == 1 {
|
||||||
|
return "", fmt.Errorf("cannot forfeit leadership, only one member")
|
||||||
|
}
|
||||||
|
|
||||||
|
var member *etcdserverpb.Member
|
||||||
|
|
||||||
|
for _, m := range resp.Members {
|
||||||
|
if m.Name == hostname {
|
||||||
|
member = m
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if member == nil {
|
||||||
|
return "", fmt.Errorf("failed to find %q in list of etcd members", hostname)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, ep := range member.GetClientURLs() {
|
||||||
|
var status *clientv3.StatusResponse
|
||||||
|
|
||||||
|
status, err = c.Status(ctx, ep)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if status.Leader != member.GetID() {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, m := range resp.Members {
|
||||||
|
if m.GetID() != member.GetID() {
|
||||||
|
log.Printf("moving leadership from %q to %q", member.GetName(), m.GetName())
|
||||||
|
|
||||||
|
c.SetEndpoints(ep)
|
||||||
|
|
||||||
|
_, err = c.MoveLeader(ctx, m.GetID())
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return m.GetName(), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user