feat: implement etcd maintenance commands

This allows to safely recover out of space quota issues, and perform
degragmentation as needed.

`talosctl etcd status` command provides lots of information about the
cluster health.

See docs for more details.

Fixes #4889

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2023-01-03 17:49:30 +04:00
parent 80fed31940
commit 96629d5ba6
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
14 changed files with 4759 additions and 775 deletions

View File

@ -51,6 +51,25 @@ service MachineService {
//
// This method is available only on control plane nodes (which run etcd).
rpc EtcdSnapshot(EtcdSnapshotRequest) returns (stream common.Data);
// EtcdAlarmList lists etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
rpc EtcdAlarmList(google.protobuf.Empty) returns (EtcdAlarmListResponse);
// EtcdAlarmDisarm disarms etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
rpc EtcdAlarmDisarm(google.protobuf.Empty) returns (EtcdAlarmDisarmResponse);
// EtcdDefragment defragments etcd data directory for the current node.
//
// Defragmentation is a resource-heavy operation, so it should only run on a specific
// node.
//
// This method is available only on control plane nodes (which run etcd).
rpc EtcdDefragment(google.protobuf.Empty) returns (EtcdDefragmentResponse);
// EtcdStatus returns etcd status for the current member.
//
// This method is available only on control plane nodes (which run etcd).
rpc EtcdStatus(google.protobuf.Empty) returns (EtcdStatusResponse);
rpc GenerateConfiguration(GenerateConfigurationRequest) returns (GenerateConfigurationResponse);
rpc Hostname(google.protobuf.Empty) returns (HostnameResponse);
rpc Kubeconfig(google.protobuf.Empty) returns (stream common.Data);
@ -982,6 +1001,64 @@ message EtcdRecoverResponse {
repeated EtcdRecover messages = 1;
}
message EtcdAlarmListResponse {
repeated EtcdAlarm messages = 1;
}
message EtcdAlarm {
common.Metadata metadata = 1;
repeated EtcdMemberAlarm member_alarms = 2;
}
message EtcdMemberAlarm {
enum AlarmType {
NONE = 0;
NOSPACE = 1;
CORRUPT = 2;
}
uint64 member_id = 1;
AlarmType alarm = 2;
}
message EtcdAlarmDisarmResponse {
repeated EtcdAlarmDisarm messages = 1;
}
message EtcdAlarmDisarm {
common.Metadata metadata = 1;
repeated EtcdMemberAlarm member_alarms = 2;
}
message EtcdDefragmentResponse {
repeated EtcdDefragment messages = 1;
}
message EtcdDefragment {
common.Metadata metadata = 1;
}
message EtcdStatusResponse {
repeated EtcdStatus messages = 1;
}
message EtcdStatus {
common.Metadata metadata = 1;
EtcdMemberStatus member_status = 2;
}
message EtcdMemberStatus {
uint64 member_id = 10;
string protocol_version = 1;
int64 db_size = 2;
int64 db_size_in_use = 3;
uint64 leader = 4;
uint64 raft_index = 5;
uint64 raft_term = 6;
uint64 raft_applied_index = 7;
repeated string errors = 8;
bool is_learner = 9;
}
// rpc generateConfiguration
message RouteConfig {

View File

@ -14,6 +14,8 @@ import (
"sync"
"text/tabwriter"
"github.com/dustin/go-humanize"
"github.com/siderolabs/gen/slices"
"github.com/spf13/cobra"
snapshot "go.etcd.io/etcd/etcdutl/v3/snapshot"
"google.golang.org/grpc/codes"
@ -21,6 +23,7 @@ import (
"github.com/siderolabs/talos/cmd/talosctl/pkg/talos/helpers"
"github.com/siderolabs/talos/pkg/cli"
"github.com/siderolabs/talos/pkg/logging"
"github.com/siderolabs/talos/pkg/machinery/api/common"
"github.com/siderolabs/talos/pkg/machinery/api/machine"
"github.com/siderolabs/talos/pkg/machinery/client"
etcdresource "github.com/siderolabs/talos/pkg/machinery/resources/etcd"
@ -33,12 +36,127 @@ var etcdCmd = &cobra.Command{
Long: ``,
}
// etcdAlarmCmd represents the etcd alarm command.
var etcdAlarmCmd = &cobra.Command{
Use: "alarm",
Short: "Manage etcd alarms",
Long: ``,
}
type alarmMessage interface {
GetMetadata() *common.Metadata
GetMemberAlarms() []*machine.EtcdMemberAlarm
}
func displayAlarms(messages []alarmMessage) error {
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
node := ""
pattern := "%s\t%s\n"
header := "MEMBER\tALARM"
for i, message := range messages {
if message.GetMetadata() != nil && message.GetMetadata().GetHostname() != "" {
node = message.GetMetadata().GetHostname()
}
for j, alarm := range message.GetMemberAlarms() {
if i == 0 && j == 0 {
if node != "" {
header = "NODE\t" + header
pattern = "%s\t" + pattern
}
fmt.Fprintln(w, header)
}
args := []interface{}{
etcdresource.FormatMemberID(alarm.GetMemberId()),
alarm.GetAlarm().String(),
}
if node != "" {
args = append([]interface{}{node}, args...)
}
fmt.Fprintf(w, pattern, args...)
}
}
return w.Flush()
}
// etcdAlarmListCmd represents the etcd alarm list command.
var etcdAlarmListCmd = &cobra.Command{
Use: "list",
Short: "List the etcd alarms for the node.",
Long: ``,
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
response, err := c.EtcdAlarmList(ctx)
if err != nil {
if response == nil {
return fmt.Errorf("error getting alarms: %w", err)
}
cli.Warning("%s", err)
}
return displayAlarms(slices.Map(response.Messages, func(v *machine.EtcdAlarm) alarmMessage {
return v
}))
})
},
}
// etcdAlarmDisarmCmd represents the etcd alarm disarm command.
var etcdAlarmDisarmCmd = &cobra.Command{
Use: "disarm",
Short: "Disarm the etcd alarms for the node.",
Long: ``,
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
response, err := c.EtcdAlarmDisarm(ctx)
if err != nil {
if response == nil {
return fmt.Errorf("error disarming alarms: %w", err)
}
cli.Warning("%s", err)
}
return displayAlarms(slices.Map(response.Messages, func(v *machine.EtcdAlarmDisarm) alarmMessage {
return v
}))
})
},
}
// etcdDefragCmd represents the etcd defrag command.
var etcdDefragCmd = &cobra.Command{
Use: "defrag",
Short: "Defragment etcd database on the node",
Long: `Defragmentation is a maintenance operation that releases unused space from the etcd database file.
Defragmentation is a resource heavy operation and should be performed only when necessary on a single node at a time.`,
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
if err := helpers.FailIfMultiNodes(ctx, "etcd defrag"); err != nil {
return err
}
_, err := c.EtcdDefragment(ctx)
return err
})
},
}
var etcdLeaveCmd = &cobra.Command{
Use: "leave",
Short: "Tell nodes to leave etcd cluster",
Long: ``,
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
if err := helpers.FailIfMultiNodes(ctx, "etcd leave"); err != nil {
return err
}
return c.EtcdLeaveCluster(ctx, &machine.EtcdLeaveClusterRequest{})
})
},
@ -146,6 +264,69 @@ var etcdMemberListCmd = &cobra.Command{
},
}
var etcdStatusCmd = &cobra.Command{
Use: "status",
Short: "Get the status of etcd cluster member",
Long: `Returns the status of etcd member on the node, use multiple nodes to get status of all members.`,
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
response, err := c.EtcdStatus(ctx)
if err != nil {
if response == nil {
return fmt.Errorf("error getting status: %w", err)
}
cli.Warning("%s", err)
}
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
node := ""
pattern := "%s\t%s\t%s (%.2f%%)\t%s\t%d\t%d\t%d\t%v\t%s\n"
header := "MEMBER\tDB SIZE\tIN USE\tLEADER\tRAFT INDEX\tRAFT TERM\tRAFT APPLIED INDEX\tLEARNER\tERRORS"
for i, message := range response.Messages {
if message.Metadata != nil && message.Metadata.Hostname != "" {
node = message.Metadata.Hostname
}
if i == 0 {
if node != "" {
header = "NODE\t" + header
pattern = "%s\t" + pattern
}
fmt.Fprintln(w, header)
}
var ratio float64
if message.GetMemberStatus().GetDbSize() > 0 {
ratio = float64(message.GetMemberStatus().GetDbSizeInUse()) / float64(message.GetMemberStatus().GetDbSize()) * 100.0
}
args := []interface{}{
etcdresource.FormatMemberID(message.GetMemberStatus().GetMemberId()),
humanize.Bytes(uint64(message.GetMemberStatus().GetDbSize())),
humanize.Bytes(uint64(message.GetMemberStatus().GetDbSizeInUse())),
ratio,
etcdresource.FormatMemberID(message.GetMemberStatus().GetLeader()),
message.GetMemberStatus().GetRaftIndex(),
message.GetMemberStatus().GetRaftTerm(),
message.GetMemberStatus().GetRaftAppliedIndex(),
message.GetMemberStatus().GetIsLearner(),
strings.Join(message.GetMemberStatus().GetErrors(), ", "),
}
if node != "" {
args = append([]interface{}{node}, args...)
}
fmt.Fprintf(w, pattern, args...)
}
return w.Flush()
})
},
}
var etcdSnapshotCmd = &cobra.Command{
Use: "snapshot <path>",
Short: "Stream snapshot of the etcd node to the path.",
@ -228,6 +409,21 @@ var etcdSnapshotCmd = &cobra.Command{
}
func init() {
etcdCmd.AddCommand(etcdLeaveCmd, etcdForfeitLeadershipCmd, etcdMemberListCmd, etcdMemberRemoveCmd, etcdSnapshotCmd)
etcdAlarmCmd.AddCommand(
etcdAlarmListCmd,
etcdAlarmDisarmCmd,
)
etcdCmd.AddCommand(
etcdAlarmCmd,
etcdDefragCmd,
etcdForfeitLeadershipCmd,
etcdLeaveCmd,
etcdMemberListCmd,
etcdMemberRemoveCmd,
etcdSnapshotCmd,
etcdStatusCmd,
)
addCommand(etcdCmd)
}

View File

@ -24,6 +24,19 @@ preface = """\
Talos is built with Go 1.19.4.
"""
[notes.etcd]
title = "etcd Maintenance"
description="""\
Talos adds new APIs to make it easier to perform etcd maintenance operations.
These APIs are available via new `talosctl etcd` sub-commands:
* `talosctl etcd alarm list|disarm`
* `talosctl etcd defrag`
* `talosctl etcd status`
See also [etcd maintenance guide](https://talos.dev/v1.4/advanced/etcd-maintenance/).
"""
[make_deps]

View File

@ -1952,6 +1952,160 @@ func (s *Server) EtcdRecover(srv machine.MachineService_EtcdRecoverServer) error
})
}
func mapAlarms(alarms []*etcdserverpb.AlarmMember) []*machine.EtcdMemberAlarm {
mapAlarmType := func(alarmType etcdserverpb.AlarmType) machine.EtcdMemberAlarm_AlarmType {
switch alarmType {
case etcdserverpb.AlarmType_NOSPACE:
return machine.EtcdMemberAlarm_NOSPACE
case etcdserverpb.AlarmType_CORRUPT:
return machine.EtcdMemberAlarm_CORRUPT
case etcdserverpb.AlarmType_NONE:
return machine.EtcdMemberAlarm_NONE
default:
return machine.EtcdMemberAlarm_NONE
}
}
return slices.Map(alarms, func(alarm *etcdserverpb.AlarmMember) *machine.EtcdMemberAlarm {
return &machine.EtcdMemberAlarm{
MemberId: alarm.MemberID,
Alarm: mapAlarmType(alarm.Alarm),
}
})
}
// EtcdAlarmList lists etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
func (s *Server) EtcdAlarmList(ctx context.Context, in *emptypb.Empty) (*machine.EtcdAlarmListResponse, error) {
if err := s.checkControlplane("etcd alarm list"); err != nil {
return nil, err
}
client, err := etcd.NewLocalClient()
if err != nil {
return nil, fmt.Errorf("failed to create etcd client: %w", err)
}
//nolint:errcheck
defer client.Close()
resp, err := client.AlarmList(ctx)
if err != nil {
return nil, fmt.Errorf("failed to list etcd alarms: %w", err)
}
return &machine.EtcdAlarmListResponse{
Messages: []*machine.EtcdAlarm{
{
MemberAlarms: mapAlarms(resp.Alarms),
},
},
}, nil
}
// EtcdAlarmDisarm disarms etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
func (s *Server) EtcdAlarmDisarm(ctx context.Context, in *emptypb.Empty) (*machine.EtcdAlarmDisarmResponse, error) {
if err := s.checkControlplane("etcd alarm list"); err != nil {
return nil, err
}
client, err := etcd.NewLocalClient()
if err != nil {
return nil, fmt.Errorf("failed to create etcd client: %w", err)
}
//nolint:errcheck
defer client.Close()
resp, err := client.AlarmDisarm(ctx, &clientv3.AlarmMember{})
if err != nil {
return nil, fmt.Errorf("failed to disarm etcd alarm: %w", err)
}
return &machine.EtcdAlarmDisarmResponse{
Messages: []*machine.EtcdAlarmDisarm{
{
MemberAlarms: mapAlarms(resp.Alarms),
},
},
}, nil
}
// EtcdDefragment defragments etcd data directory for the current node.
//
// Defragmentation is a resource-heavy operation, so it should only run on a specific
// node.
//
// This method is available only on control plane nodes (which run etcd).
func (s *Server) EtcdDefragment(ctx context.Context, in *emptypb.Empty) (*machine.EtcdDefragmentResponse, error) {
if err := s.checkControlplane("etcd defragment"); err != nil {
return nil, err
}
client, err := etcd.NewLocalClient()
if err != nil {
return nil, fmt.Errorf("failed to create etcd client: %w", err)
}
//nolint:errcheck
defer client.Close()
_, err = client.Defragment(ctx, nethelpers.JoinHostPort("localhost", constants.EtcdClientPort))
if err != nil {
return nil, fmt.Errorf("failed to defragment etcd: %w", err)
}
return &machine.EtcdDefragmentResponse{
Messages: []*machine.EtcdDefragment{
{},
},
}, nil
}
// EtcdStatus returns etcd status for the member of the cluster.
//
// This method is available only on control plane nodes (which run etcd).
func (s *Server) EtcdStatus(ctx context.Context, in *emptypb.Empty) (*machine.EtcdStatusResponse, error) {
if err := s.checkControlplane("etcd status"); err != nil {
return nil, err
}
client, err := etcd.NewLocalClient()
if err != nil {
return nil, fmt.Errorf("failed to create etcd client: %w", err)
}
//nolint:errcheck
defer client.Close()
resp, err := client.Status(ctx, nethelpers.JoinHostPort("localhost", constants.EtcdClientPort))
if err != nil {
return nil, fmt.Errorf("failed to query etcd status: %w", err)
}
return &machine.EtcdStatusResponse{
Messages: []*machine.EtcdStatus{
{
MemberStatus: &machine.EtcdMemberStatus{
MemberId: resp.Header.MemberId,
ProtocolVersion: resp.Version,
DbSize: resp.DbSize,
DbSizeInUse: resp.DbSizeInUse,
Leader: resp.Leader,
RaftIndex: resp.RaftIndex,
RaftTerm: resp.RaftTerm,
RaftAppliedIndex: resp.RaftAppliedIndex,
Errors: resp.Errors,
IsLearner: resp.IsLearner,
},
},
},
}, nil
}
// GenerateClientConfiguration implements the machine.MachineServer interface.
func (s *Server) GenerateClientConfiguration(ctx context.Context, in *machine.GenerateClientConfigurationRequest) (*machine.GenerateClientConfigurationResponse, error) {
if s.Controller.Runtime().Config().Machine().Type() == machinetype.TypeWorker {

View File

@ -40,6 +40,9 @@ var rules = map[string]role.Set{
"/machine.MachineService/DiskStats": role.MakeSet(role.Admin, role.Reader),
"/machine.MachineService/DiskUsage": role.MakeSet(role.Admin, role.Reader),
"/machine.MachineService/Dmesg": role.MakeSet(role.Admin, role.Reader),
"/machine.MachineService/EtcdAlarmList": role.MakeSet(role.Admin),
"/machine.MachineService/EtcdAlarmDisarm": role.MakeSet(role.Admin),
"/machine.MachineService/EtcdDefragment": role.MakeSet(role.Admin),
"/machine.MachineService/EtcdForfeitLeadership": role.MakeSet(role.Admin),
"/machine.MachineService/EtcdLeaveCluster": role.MakeSet(role.Admin),
"/machine.MachineService/EtcdMemberList": role.MakeSet(role.Admin, role.Reader),
@ -47,6 +50,7 @@ var rules = map[string]role.Set{
"/machine.MachineService/EtcdRemoveMember": role.MakeSet(role.Admin),
"/machine.MachineService/EtcdRemoveMemberByID": role.MakeSet(role.Admin),
"/machine.MachineService/EtcdSnapshot": role.MakeSet(role.Admin, role.EtcdBackup),
"/machine.MachineService/EtcdStatus": role.MakeSet(role.Admin),
"/machine.MachineService/Events": role.MakeSet(role.Admin, role.Reader),
"/machine.MachineService/GenerateClientConfiguration": role.MakeSet(role.Admin),
"/machine.MachineService/GenerateConfiguration": role.MakeSet(role.Admin),

View File

@ -50,6 +50,13 @@ func (cliSuite *CLISuite) DiscoverNodeInternalIPs(ctx context.Context) []string
return mapNodeInfosToInternalIPs(nodes.Nodes())
}
// DiscoverNodeInternalIPsByType provides list of Talos node internal IPs in the cluster for given machine type.
func (cliSuite *CLISuite) DiscoverNodeInternalIPsByType(ctx context.Context, machineType machine.Type) []string {
nodesByType := cliSuite.DiscoverNodes(ctx).NodesByType(machineType)
return mapNodeInfosToInternalIPs(nodesByType)
}
// RandomDiscoveredNodeInternalIP returns the internal IP a random node of the specified type (or any type if no types are specified).
//
//nolint:dupl

View File

@ -10,6 +10,7 @@ import (
"context"
"path/filepath"
"regexp"
"strings"
"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1/machine"
@ -30,6 +31,21 @@ func (suite *EtcdSuite) TestMembers() {
suite.RunCLI([]string{"etcd", "members", "--nodes", suite.RandomDiscoveredNodeInternalIP(machine.TypeControlPlane)}) // default checks for stdout not empty
}
// TestStatus etcd status should have some output.
func (suite *EtcdSuite) TestStatus() {
cpNodes := suite.DiscoverNodeInternalIPsByType(context.TODO(), machine.TypeControlPlane)
suite.RunCLI([]string{"etcd", "status", "--nodes", strings.Join(cpNodes, ",")}) // default checks for stdout not empty
}
// TestAlarm etcd alarm should have no output.
func (suite *EtcdSuite) TestAlarm() {
cpNode := suite.RandomDiscoveredNodeInternalIP(machine.TypeControlPlane)
suite.RunCLI([]string{"etcd", "alarm", "list", "--nodes", cpNode}, base.StdoutEmpty())
suite.RunCLI([]string{"etcd", "alarm", "disarm", "--nodes", cpNode}, base.StdoutEmpty())
}
// TestForfeitLeadership etcd forfeit-leadership check.
func (suite *EtcdSuite) TestForfeitLeadership() {
nodes := suite.DiscoverNodes(context.TODO()).NodesByType(machine.TypeControlPlane)

File diff suppressed because it is too large Load Diff

View File

@ -63,6 +63,25 @@ type MachineServiceClient interface {
//
// This method is available only on control plane nodes (which run etcd).
EtcdSnapshot(ctx context.Context, in *EtcdSnapshotRequest, opts ...grpc.CallOption) (MachineService_EtcdSnapshotClient, error)
// EtcdAlarmList lists etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
EtcdAlarmList(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmListResponse, error)
// EtcdAlarmDisarm disarms etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
EtcdAlarmDisarm(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmDisarmResponse, error)
// EtcdDefragment defragments etcd data directory for the current node.
//
// Defragmentation is a resource-heavy operation, so it should only run on a specific
// node.
//
// This method is available only on control plane nodes (which run etcd).
EtcdDefragment(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdDefragmentResponse, error)
// EtcdStatus returns etcd status for the current member.
//
// This method is available only on control plane nodes (which run etcd).
EtcdStatus(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdStatusResponse, error)
GenerateConfiguration(ctx context.Context, in *GenerateConfigurationRequest, opts ...grpc.CallOption) (*GenerateConfigurationResponse, error)
Hostname(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*HostnameResponse, error)
Kubeconfig(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (MachineService_KubeconfigClient, error)
@ -355,6 +374,42 @@ func (x *machineServiceEtcdSnapshotClient) Recv() (*common.Data, error) {
return m, nil
}
func (c *machineServiceClient) EtcdAlarmList(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmListResponse, error) {
out := new(EtcdAlarmListResponse)
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdAlarmList", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *machineServiceClient) EtcdAlarmDisarm(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmDisarmResponse, error) {
out := new(EtcdAlarmDisarmResponse)
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdAlarmDisarm", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *machineServiceClient) EtcdDefragment(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdDefragmentResponse, error) {
out := new(EtcdDefragmentResponse)
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdDefragment", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *machineServiceClient) EtcdStatus(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdStatusResponse, error) {
out := new(EtcdStatusResponse)
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdStatus", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *machineServiceClient) GenerateConfiguration(ctx context.Context, in *GenerateConfigurationRequest, opts ...grpc.CallOption) (*GenerateConfigurationResponse, error) {
out := new(GenerateConfigurationResponse)
err := c.cc.Invoke(ctx, "/machine.MachineService/GenerateConfiguration", in, out, opts...)
@ -777,6 +832,25 @@ type MachineServiceServer interface {
//
// This method is available only on control plane nodes (which run etcd).
EtcdSnapshot(*EtcdSnapshotRequest, MachineService_EtcdSnapshotServer) error
// EtcdAlarmList lists etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
EtcdAlarmList(context.Context, *emptypb.Empty) (*EtcdAlarmListResponse, error)
// EtcdAlarmDisarm disarms etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
EtcdAlarmDisarm(context.Context, *emptypb.Empty) (*EtcdAlarmDisarmResponse, error)
// EtcdDefragment defragments etcd data directory for the current node.
//
// Defragmentation is a resource-heavy operation, so it should only run on a specific
// node.
//
// This method is available only on control plane nodes (which run etcd).
EtcdDefragment(context.Context, *emptypb.Empty) (*EtcdDefragmentResponse, error)
// EtcdStatus returns etcd status for the current member.
//
// This method is available only on control plane nodes (which run etcd).
EtcdStatus(context.Context, *emptypb.Empty) (*EtcdStatusResponse, error)
GenerateConfiguration(context.Context, *GenerateConfigurationRequest) (*GenerateConfigurationResponse, error)
Hostname(context.Context, *emptypb.Empty) (*HostnameResponse, error)
Kubeconfig(*emptypb.Empty, MachineService_KubeconfigServer) error
@ -858,6 +932,18 @@ func (UnimplementedMachineServiceServer) EtcdRecover(MachineService_EtcdRecoverS
func (UnimplementedMachineServiceServer) EtcdSnapshot(*EtcdSnapshotRequest, MachineService_EtcdSnapshotServer) error {
return status.Errorf(codes.Unimplemented, "method EtcdSnapshot not implemented")
}
func (UnimplementedMachineServiceServer) EtcdAlarmList(context.Context, *emptypb.Empty) (*EtcdAlarmListResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method EtcdAlarmList not implemented")
}
func (UnimplementedMachineServiceServer) EtcdAlarmDisarm(context.Context, *emptypb.Empty) (*EtcdAlarmDisarmResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method EtcdAlarmDisarm not implemented")
}
func (UnimplementedMachineServiceServer) EtcdDefragment(context.Context, *emptypb.Empty) (*EtcdDefragmentResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method EtcdDefragment not implemented")
}
func (UnimplementedMachineServiceServer) EtcdStatus(context.Context, *emptypb.Empty) (*EtcdStatusResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method EtcdStatus not implemented")
}
func (UnimplementedMachineServiceServer) GenerateConfiguration(context.Context, *GenerateConfigurationRequest) (*GenerateConfigurationResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method GenerateConfiguration not implemented")
}
@ -1242,6 +1328,78 @@ func (x *machineServiceEtcdSnapshotServer) Send(m *common.Data) error {
return x.ServerStream.SendMsg(m)
}
func _MachineService_EtcdAlarmList_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(emptypb.Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MachineServiceServer).EtcdAlarmList(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/machine.MachineService/EtcdAlarmList",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MachineServiceServer).EtcdAlarmList(ctx, req.(*emptypb.Empty))
}
return interceptor(ctx, in, info, handler)
}
func _MachineService_EtcdAlarmDisarm_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(emptypb.Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MachineServiceServer).EtcdAlarmDisarm(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/machine.MachineService/EtcdAlarmDisarm",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MachineServiceServer).EtcdAlarmDisarm(ctx, req.(*emptypb.Empty))
}
return interceptor(ctx, in, info, handler)
}
func _MachineService_EtcdDefragment_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(emptypb.Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MachineServiceServer).EtcdDefragment(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/machine.MachineService/EtcdDefragment",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MachineServiceServer).EtcdDefragment(ctx, req.(*emptypb.Empty))
}
return interceptor(ctx, in, info, handler)
}
func _MachineService_EtcdStatus_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(emptypb.Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MachineServiceServer).EtcdStatus(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/machine.MachineService/EtcdStatus",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MachineServiceServer).EtcdStatus(ctx, req.(*emptypb.Empty))
}
return interceptor(ctx, in, info, handler)
}
func _MachineService_GenerateConfiguration_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GenerateConfigurationRequest)
if err := dec(in); err != nil {
@ -1793,6 +1951,22 @@ var MachineService_ServiceDesc = grpc.ServiceDesc{
MethodName: "EtcdForfeitLeadership",
Handler: _MachineService_EtcdForfeitLeadership_Handler,
},
{
MethodName: "EtcdAlarmList",
Handler: _MachineService_EtcdAlarmList_Handler,
},
{
MethodName: "EtcdAlarmDisarm",
Handler: _MachineService_EtcdAlarmDisarm_Handler,
},
{
MethodName: "EtcdDefragment",
Handler: _MachineService_EtcdDefragment_Handler,
},
{
MethodName: "EtcdStatus",
Handler: _MachineService_EtcdStatus_Handler,
},
{
MethodName: "GenerateConfiguration",
Handler: _MachineService_GenerateConfiguration_Handler,

File diff suppressed because it is too large Load Diff

View File

@ -839,6 +839,61 @@ func (c *Client) EtcdRecover(ctx context.Context, snapshot io.Reader, callOption
return resp, err
}
// EtcdAlarmList lists etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
func (c *Client) EtcdAlarmList(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdAlarmListResponse, error) {
resp, err := c.MachineClient.EtcdAlarmList(ctx, &emptypb.Empty{}, opts...)
var filtered interface{}
filtered, err = FilterMessages(resp, err)
resp, _ = filtered.(*machineapi.EtcdAlarmListResponse) //nolint:errcheck
return resp, err
}
// EtcdAlarmDisarm disarms etcd alarms for the current node.
//
// This method is available only on control plane nodes (which run etcd).
func (c *Client) EtcdAlarmDisarm(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdAlarmDisarmResponse, error) {
resp, err := c.MachineClient.EtcdAlarmDisarm(ctx, &emptypb.Empty{}, opts...)
var filtered interface{}
filtered, err = FilterMessages(resp, err)
resp, _ = filtered.(*machineapi.EtcdAlarmDisarmResponse) //nolint:errcheck
return resp, err
}
// EtcdDefragment defragments etcd data directory for the current node.
//
// Defragmentation is a resource-heavy operation, so it should only run on a specific
// node.
//
// This method is available only on control plane nodes (which run etcd).
func (c *Client) EtcdDefragment(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdDefragmentResponse, error) {
resp, err := c.MachineClient.EtcdDefragment(ctx, &emptypb.Empty{}, opts...)
var filtered interface{}
filtered, err = FilterMessages(resp, err)
resp, _ = filtered.(*machineapi.EtcdDefragmentResponse) //nolint:errcheck
return resp, err
}
// EtcdStatus returns etcd status for the current member.
//
// This method is available only on control plane nodes (which run etcd).
func (c *Client) EtcdStatus(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdStatusResponse, error) {
resp, err := c.MachineClient.EtcdStatus(ctx, &emptypb.Empty{}, opts...)
var filtered interface{}
filtered, err = FilterMessages(resp, err)
resp, _ = filtered.(*machineapi.EtcdStatusResponse) //nolint:errcheck
return resp, err
}
// GenerateClientConfiguration implements proto.MachineServiceClient interface.
func (c *Client) GenerateClientConfiguration(ctx context.Context, req *machineapi.GenerateClientConfigurationRequest, callOptions ...grpc.CallOption) (resp *machineapi.GenerateClientConfigurationResponse, err error) { //nolint:lll
resp, err = c.MachineClient.GenerateClientConfiguration(ctx, req, callOptions...)

View File

@ -0,0 +1,76 @@
---
title: "etcd Maintenance"
description: "Operational instructions for etcd database."
---
`etcd` database backs Kubernetes control plane state, so `etcd` health is critical for Kubernetes availability.
## Space Quota
`etcd` default database space quota is set to 2 GiB by default.
If the database size exceeds the quota, `etcd` will stop operations until the issue is resolved.
This condition can be checked with `talosctl etcd alarm list` command:
```bash
$ talosctl -n <IP> etcd alarm list
NODE MEMBER ALARM
172.20.0.2 a49c021e76e707db NOSPACE
```
If the Kubernetes database contains lots of resources, space quota can be increased to match the actual usage.
The recommended maximum size is 8 GiB.
To increase the space quota, edit the `etcd` section in the machine configuration:
```yaml
machine:
etcd:
extraArgs:
quota-backend-bytes: 4294967296 # 4 GiB
```
Once the node is rebooted with the new configuration, use `talosctl etcd alarm disarm` to clear the `NOSPACE` alarm.
## Defragmentation
`etcd` database can become fragmented over time if there are lots of writes and deletes.
Kubernetes API server performs automatic compaction of the `etcd` database, which marks deleted space as free and ready to be reused.
However, the space is not actually freed until the database is defragmented.
If the database is heavily fragmented (in use/db size ratio is less than 0.5), defragmentation might increase the performance.
If the database runs over the space quota (see above), but the actual in use database size is small, defragmentation is required to bring the on-disk database size below the limit.
Current database size can be checked with `talosctl etcd status` command:
```bash
$ talosctl -n <CP1>,<CP2>,<CP3> etcd status
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER ERRORS
172.20.0.3 ecebb05b59a776f1 21 MB 6.0 MB (29.08%) ecebb05b59a776f1 53391 4 53391 false
172.20.0.2 a49c021e76e707db 17 MB 4.5 MB (26.10%) ecebb05b59a776f1 53391 4 53391 false
172.20.0.4 eb47fb33e59bf0e2 20 MB 5.9 MB (28.96%) ecebb05b59a776f1 53391 4 53391 false
```
If any of the nodes are over database size quota, alarms will be printed in the `ERRORS` column.
To defragment the database, run `talosctl etcd defrag` command:
```bash
talosctl -n <CP1> etcd defrag
```
> Note: defragmentation is a resource-intensive operation, so it is recommended to run it on a single node at a time.
> Defragmentation to a live member blocks the system from reading and writing data while rebuilding its state.
Once the defragmentation is complete, the database size will match closely to the in use size:
```bash
$ talosctl -n <CP1> etcd status
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER ERRORS
172.20.0.2 a49c021e76e707db 4.5 MB 4.5 MB (100.00%) ecebb05b59a776f1 56065 4 56065 false
```
## Snapshotting
Regular backups of `etcd` database should be performed to ensure that the cluster can be restored in case of a failure.
This procedure is described in the [disaster recovery]({{< relref "disaster-recovery" >}}) guide.

View File

@ -235,6 +235,12 @@ description: Talos gRPC API reference.
- [DiskUsageInfo](#machine.DiskUsageInfo)
- [DiskUsageRequest](#machine.DiskUsageRequest)
- [DmesgRequest](#machine.DmesgRequest)
- [EtcdAlarm](#machine.EtcdAlarm)
- [EtcdAlarmDisarm](#machine.EtcdAlarmDisarm)
- [EtcdAlarmDisarmResponse](#machine.EtcdAlarmDisarmResponse)
- [EtcdAlarmListResponse](#machine.EtcdAlarmListResponse)
- [EtcdDefragment](#machine.EtcdDefragment)
- [EtcdDefragmentResponse](#machine.EtcdDefragmentResponse)
- [EtcdForfeitLeadership](#machine.EtcdForfeitLeadership)
- [EtcdForfeitLeadershipRequest](#machine.EtcdForfeitLeadershipRequest)
- [EtcdForfeitLeadershipResponse](#machine.EtcdForfeitLeadershipResponse)
@ -242,8 +248,10 @@ description: Talos gRPC API reference.
- [EtcdLeaveClusterRequest](#machine.EtcdLeaveClusterRequest)
- [EtcdLeaveClusterResponse](#machine.EtcdLeaveClusterResponse)
- [EtcdMember](#machine.EtcdMember)
- [EtcdMemberAlarm](#machine.EtcdMemberAlarm)
- [EtcdMemberListRequest](#machine.EtcdMemberListRequest)
- [EtcdMemberListResponse](#machine.EtcdMemberListResponse)
- [EtcdMemberStatus](#machine.EtcdMemberStatus)
- [EtcdMembers](#machine.EtcdMembers)
- [EtcdRecover](#machine.EtcdRecover)
- [EtcdRecoverResponse](#machine.EtcdRecoverResponse)
@ -254,6 +262,8 @@ description: Talos gRPC API reference.
- [EtcdRemoveMemberRequest](#machine.EtcdRemoveMemberRequest)
- [EtcdRemoveMemberResponse](#machine.EtcdRemoveMemberResponse)
- [EtcdSnapshotRequest](#machine.EtcdSnapshotRequest)
- [EtcdStatus](#machine.EtcdStatus)
- [EtcdStatusResponse](#machine.EtcdStatusResponse)
- [Event](#machine.Event)
- [EventsRequest](#machine.EventsRequest)
- [FeaturesInfo](#machine.FeaturesInfo)
@ -344,6 +354,7 @@ description: Talos gRPC API reference.
- [VersionResponse](#machine.VersionResponse)
- [ApplyConfigurationRequest.Mode](#machine.ApplyConfigurationRequest.Mode)
- [EtcdMemberAlarm.AlarmType](#machine.EtcdMemberAlarm.AlarmType)
- [ListRequest.Type](#machine.ListRequest.Type)
- [MachineConfig.MachineType](#machine.MachineConfig.MachineType)
- [MachineStatusEvent.MachineStage](#machine.MachineStatusEvent.MachineStage)
@ -4109,6 +4120,98 @@ dmesg
<a name="machine.EtcdAlarm"></a>
### EtcdAlarm
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| metadata | [common.Metadata](#common.Metadata) | | |
| member_alarms | [EtcdMemberAlarm](#machine.EtcdMemberAlarm) | repeated | |
<a name="machine.EtcdAlarmDisarm"></a>
### EtcdAlarmDisarm
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| metadata | [common.Metadata](#common.Metadata) | | |
| member_alarms | [EtcdMemberAlarm](#machine.EtcdMemberAlarm) | repeated | |
<a name="machine.EtcdAlarmDisarmResponse"></a>
### EtcdAlarmDisarmResponse
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| messages | [EtcdAlarmDisarm](#machine.EtcdAlarmDisarm) | repeated | |
<a name="machine.EtcdAlarmListResponse"></a>
### EtcdAlarmListResponse
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| messages | [EtcdAlarm](#machine.EtcdAlarm) | repeated | |
<a name="machine.EtcdDefragment"></a>
### EtcdDefragment
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| metadata | [common.Metadata](#common.Metadata) | | |
<a name="machine.EtcdDefragmentResponse"></a>
### EtcdDefragmentResponse
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| messages | [EtcdDefragment](#machine.EtcdDefragment) | repeated | |
<a name="machine.EtcdForfeitLeadership"></a>
### EtcdForfeitLeadership
@ -4209,6 +4312,22 @@ EtcdMember describes a single etcd member.
<a name="machine.EtcdMemberAlarm"></a>
### EtcdMemberAlarm
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| member_id | [uint64](#uint64) | | |
| alarm | [EtcdMemberAlarm.AlarmType](#machine.EtcdMemberAlarm.AlarmType) | | |
<a name="machine.EtcdMemberListRequest"></a>
### EtcdMemberListRequest
@ -4239,6 +4358,30 @@ EtcdMember describes a single etcd member.
<a name="machine.EtcdMemberStatus"></a>
### EtcdMemberStatus
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| member_id | [uint64](#uint64) | | |
| protocol_version | [string](#string) | | |
| db_size | [int64](#int64) | | |
| db_size_in_use | [int64](#int64) | | |
| leader | [uint64](#uint64) | | |
| raft_index | [uint64](#uint64) | | |
| raft_term | [uint64](#uint64) | | |
| raft_applied_index | [uint64](#uint64) | | |
| errors | [string](#string) | repeated | |
| is_learner | [bool](#bool) | | |
<a name="machine.EtcdMembers"></a>
### EtcdMembers
@ -4386,6 +4529,37 @@ EtcdMembers contains the list of members registered on the host.
<a name="machine.EtcdStatus"></a>
### EtcdStatus
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| metadata | [common.Metadata](#common.Metadata) | | |
| member_status | [EtcdMemberStatus](#machine.EtcdMemberStatus) | | |
<a name="machine.EtcdStatusResponse"></a>
### EtcdStatusResponse
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| messages | [EtcdStatus](#machine.EtcdStatus) | repeated | |
<a name="machine.Event"></a>
### Event
@ -5926,6 +6100,19 @@ rpc upgrade
<a name="machine.EtcdMemberAlarm.AlarmType"></a>
### EtcdMemberAlarm.AlarmType
| Name | Number | Description |
| ---- | ------ | ----------- |
| NONE | 0 | |
| NOSPACE | 1 | |
| CORRUPT | 2 | |
<a name="machine.ListRequest.Type"></a>
### ListRequest.Type
@ -6076,6 +6263,20 @@ This API should be used to remove members which don't have an associated Talos n
Snapshot can be later used to recover the cluster via Bootstrap method. |
| EtcdSnapshot | [EtcdSnapshotRequest](#machine.EtcdSnapshotRequest) | [.common.Data](#common.Data) stream | EtcdSnapshot method creates etcd data snapshot (backup) from the local etcd instance and streams it back to the client.
This method is available only on control plane nodes (which run etcd). |
| EtcdAlarmList | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdAlarmListResponse](#machine.EtcdAlarmListResponse) | EtcdAlarmList lists etcd alarms for the current node.
This method is available only on control plane nodes (which run etcd). |
| EtcdAlarmDisarm | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdAlarmDisarmResponse](#machine.EtcdAlarmDisarmResponse) | EtcdAlarmDisarm disarms etcd alarms for the current node.
This method is available only on control plane nodes (which run etcd). |
| EtcdDefragment | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdDefragmentResponse](#machine.EtcdDefragmentResponse) | EtcdDefragment defragments etcd data directory for the current node.
Defragmentation is a resource-heavy operation, so it should only run on a specific node.
This method is available only on control plane nodes (which run etcd). |
| EtcdStatus | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdStatusResponse](#machine.EtcdStatusResponse) | EtcdStatus returns etcd status for the current member.
This method is available only on control plane nodes (which run etcd). |
| GenerateConfiguration | [GenerateConfigurationRequest](#machine.GenerateConfigurationRequest) | [GenerateConfigurationResponse](#machine.GenerateConfigurationResponse) | |
| Hostname | [.google.protobuf.Empty](#google.protobuf.Empty) | [HostnameResponse](#machine.HostnameResponse) | |

View File

@ -901,6 +901,121 @@ talosctl edit <type> [<id>] [flags]
* [talosctl](#talosctl) - A CLI for out-of-band management of Kubernetes nodes created by Talos
## talosctl etcd alarm disarm
Disarm the etcd alarms for the node.
```
talosctl etcd alarm disarm [flags]
```
### Options
```
-h, --help help for disarm
```
### Options inherited from parent commands
```
--cluster string Cluster to connect to if a proxy endpoint is used.
--context string Context to be used in command
-e, --endpoints strings override default endpoints in Talos configuration
-n, --nodes strings target the specified nodes
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
```
### SEE ALSO
* [talosctl etcd alarm](#talosctl-etcd-alarm) - Manage etcd alarms
## talosctl etcd alarm list
List the etcd alarms for the node.
```
talosctl etcd alarm list [flags]
```
### Options
```
-h, --help help for list
```
### Options inherited from parent commands
```
--cluster string Cluster to connect to if a proxy endpoint is used.
--context string Context to be used in command
-e, --endpoints strings override default endpoints in Talos configuration
-n, --nodes strings target the specified nodes
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
```
### SEE ALSO
* [talosctl etcd alarm](#talosctl-etcd-alarm) - Manage etcd alarms
## talosctl etcd alarm
Manage etcd alarms
### Options
```
-h, --help help for alarm
```
### Options inherited from parent commands
```
--cluster string Cluster to connect to if a proxy endpoint is used.
--context string Context to be used in command
-e, --endpoints strings override default endpoints in Talos configuration
-n, --nodes strings target the specified nodes
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
```
### SEE ALSO
* [talosctl etcd](#talosctl-etcd) - Manage etcd
* [talosctl etcd alarm disarm](#talosctl-etcd-alarm-disarm) - Disarm the etcd alarms for the node.
* [talosctl etcd alarm list](#talosctl-etcd-alarm-list) - List the etcd alarms for the node.
## talosctl etcd defrag
Defragment etcd database on the node
### Synopsis
Defragmentation is a maintenance operation that releases unused space from the etcd database file.
Defragmentation is a resource heavy operation and should be performed only when necessary on a single node at a time.
```
talosctl etcd defrag [flags]
```
### Options
```
-h, --help help for defrag
```
### Options inherited from parent commands
```
--cluster string Cluster to connect to if a proxy endpoint is used.
--context string Context to be used in command
-e, --endpoints strings override default endpoints in Talos configuration
-n, --nodes strings target the specified nodes
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
```
### SEE ALSO
* [talosctl etcd](#talosctl-etcd) - Manage etcd
## talosctl etcd forfeit-leadership
Tell node to forfeit etcd cluster leadership
@ -1048,6 +1163,38 @@ talosctl etcd snapshot <path> [flags]
* [talosctl etcd](#talosctl-etcd) - Manage etcd
## talosctl etcd status
Get the status of etcd cluster member
### Synopsis
Returns the status of etcd member on the node, use multiple nodes to get status of all members.
```
talosctl etcd status [flags]
```
### Options
```
-h, --help help for status
```
### Options inherited from parent commands
```
--cluster string Cluster to connect to if a proxy endpoint is used.
--context string Context to be used in command
-e, --endpoints strings override default endpoints in Talos configuration
-n, --nodes strings target the specified nodes
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
```
### SEE ALSO
* [talosctl etcd](#talosctl-etcd) - Manage etcd
## talosctl etcd
Manage etcd
@ -1071,11 +1218,14 @@ Manage etcd
### SEE ALSO
* [talosctl](#talosctl) - A CLI for out-of-band management of Kubernetes nodes created by Talos
* [talosctl etcd alarm](#talosctl-etcd-alarm) - Manage etcd alarms
* [talosctl etcd defrag](#talosctl-etcd-defrag) - Defragment etcd database on the node
* [talosctl etcd forfeit-leadership](#talosctl-etcd-forfeit-leadership) - Tell node to forfeit etcd cluster leadership
* [talosctl etcd leave](#talosctl-etcd-leave) - Tell nodes to leave etcd cluster
* [talosctl etcd members](#talosctl-etcd-members) - Get the list of etcd cluster members
* [talosctl etcd remove-member](#talosctl-etcd-remove-member) - Remove the node from etcd cluster
* [talosctl etcd snapshot](#talosctl-etcd-snapshot) - Stream snapshot of the etcd node to the path.
* [talosctl etcd status](#talosctl-etcd-status) - Get the status of etcd cluster member
## talosctl events