mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-17 18:41:16 +02:00
feat: implement etcd maintenance commands
This allows to safely recover out of space quota issues, and perform degragmentation as needed. `talosctl etcd status` command provides lots of information about the cluster health. See docs for more details. Fixes #4889 Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
parent
80fed31940
commit
96629d5ba6
@ -51,6 +51,25 @@ service MachineService {
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
rpc EtcdSnapshot(EtcdSnapshotRequest) returns (stream common.Data);
|
||||
// EtcdAlarmList lists etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
rpc EtcdAlarmList(google.protobuf.Empty) returns (EtcdAlarmListResponse);
|
||||
// EtcdAlarmDisarm disarms etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
rpc EtcdAlarmDisarm(google.protobuf.Empty) returns (EtcdAlarmDisarmResponse);
|
||||
// EtcdDefragment defragments etcd data directory for the current node.
|
||||
//
|
||||
// Defragmentation is a resource-heavy operation, so it should only run on a specific
|
||||
// node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
rpc EtcdDefragment(google.protobuf.Empty) returns (EtcdDefragmentResponse);
|
||||
// EtcdStatus returns etcd status for the current member.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
rpc EtcdStatus(google.protobuf.Empty) returns (EtcdStatusResponse);
|
||||
rpc GenerateConfiguration(GenerateConfigurationRequest) returns (GenerateConfigurationResponse);
|
||||
rpc Hostname(google.protobuf.Empty) returns (HostnameResponse);
|
||||
rpc Kubeconfig(google.protobuf.Empty) returns (stream common.Data);
|
||||
@ -982,6 +1001,64 @@ message EtcdRecoverResponse {
|
||||
repeated EtcdRecover messages = 1;
|
||||
}
|
||||
|
||||
message EtcdAlarmListResponse {
|
||||
repeated EtcdAlarm messages = 1;
|
||||
}
|
||||
|
||||
message EtcdAlarm {
|
||||
common.Metadata metadata = 1;
|
||||
repeated EtcdMemberAlarm member_alarms = 2;
|
||||
}
|
||||
|
||||
message EtcdMemberAlarm {
|
||||
enum AlarmType {
|
||||
NONE = 0;
|
||||
NOSPACE = 1;
|
||||
CORRUPT = 2;
|
||||
}
|
||||
uint64 member_id = 1;
|
||||
AlarmType alarm = 2;
|
||||
}
|
||||
|
||||
message EtcdAlarmDisarmResponse {
|
||||
repeated EtcdAlarmDisarm messages = 1;
|
||||
}
|
||||
|
||||
message EtcdAlarmDisarm {
|
||||
common.Metadata metadata = 1;
|
||||
repeated EtcdMemberAlarm member_alarms = 2;
|
||||
}
|
||||
|
||||
message EtcdDefragmentResponse {
|
||||
repeated EtcdDefragment messages = 1;
|
||||
}
|
||||
|
||||
message EtcdDefragment {
|
||||
common.Metadata metadata = 1;
|
||||
}
|
||||
|
||||
message EtcdStatusResponse {
|
||||
repeated EtcdStatus messages = 1;
|
||||
}
|
||||
|
||||
message EtcdStatus {
|
||||
common.Metadata metadata = 1;
|
||||
EtcdMemberStatus member_status = 2;
|
||||
}
|
||||
|
||||
message EtcdMemberStatus {
|
||||
uint64 member_id = 10;
|
||||
string protocol_version = 1;
|
||||
int64 db_size = 2;
|
||||
int64 db_size_in_use = 3;
|
||||
uint64 leader = 4;
|
||||
uint64 raft_index = 5;
|
||||
uint64 raft_term = 6;
|
||||
uint64 raft_applied_index = 7;
|
||||
repeated string errors = 8;
|
||||
bool is_learner = 9;
|
||||
}
|
||||
|
||||
// rpc generateConfiguration
|
||||
|
||||
message RouteConfig {
|
||||
|
@ -14,6 +14,8 @@ import (
|
||||
"sync"
|
||||
"text/tabwriter"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/siderolabs/gen/slices"
|
||||
"github.com/spf13/cobra"
|
||||
snapshot "go.etcd.io/etcd/etcdutl/v3/snapshot"
|
||||
"google.golang.org/grpc/codes"
|
||||
@ -21,6 +23,7 @@ import (
|
||||
"github.com/siderolabs/talos/cmd/talosctl/pkg/talos/helpers"
|
||||
"github.com/siderolabs/talos/pkg/cli"
|
||||
"github.com/siderolabs/talos/pkg/logging"
|
||||
"github.com/siderolabs/talos/pkg/machinery/api/common"
|
||||
"github.com/siderolabs/talos/pkg/machinery/api/machine"
|
||||
"github.com/siderolabs/talos/pkg/machinery/client"
|
||||
etcdresource "github.com/siderolabs/talos/pkg/machinery/resources/etcd"
|
||||
@ -33,12 +36,127 @@ var etcdCmd = &cobra.Command{
|
||||
Long: ``,
|
||||
}
|
||||
|
||||
// etcdAlarmCmd represents the etcd alarm command.
|
||||
var etcdAlarmCmd = &cobra.Command{
|
||||
Use: "alarm",
|
||||
Short: "Manage etcd alarms",
|
||||
Long: ``,
|
||||
}
|
||||
|
||||
type alarmMessage interface {
|
||||
GetMetadata() *common.Metadata
|
||||
GetMemberAlarms() []*machine.EtcdMemberAlarm
|
||||
}
|
||||
|
||||
func displayAlarms(messages []alarmMessage) error {
|
||||
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
|
||||
node := ""
|
||||
pattern := "%s\t%s\n"
|
||||
header := "MEMBER\tALARM"
|
||||
|
||||
for i, message := range messages {
|
||||
if message.GetMetadata() != nil && message.GetMetadata().GetHostname() != "" {
|
||||
node = message.GetMetadata().GetHostname()
|
||||
}
|
||||
|
||||
for j, alarm := range message.GetMemberAlarms() {
|
||||
if i == 0 && j == 0 {
|
||||
if node != "" {
|
||||
header = "NODE\t" + header
|
||||
pattern = "%s\t" + pattern
|
||||
}
|
||||
|
||||
fmt.Fprintln(w, header)
|
||||
}
|
||||
|
||||
args := []interface{}{
|
||||
etcdresource.FormatMemberID(alarm.GetMemberId()),
|
||||
alarm.GetAlarm().String(),
|
||||
}
|
||||
if node != "" {
|
||||
args = append([]interface{}{node}, args...)
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, pattern, args...)
|
||||
}
|
||||
}
|
||||
|
||||
return w.Flush()
|
||||
}
|
||||
|
||||
// etcdAlarmListCmd represents the etcd alarm list command.
|
||||
var etcdAlarmListCmd = &cobra.Command{
|
||||
Use: "list",
|
||||
Short: "List the etcd alarms for the node.",
|
||||
Long: ``,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return WithClient(func(ctx context.Context, c *client.Client) error {
|
||||
response, err := c.EtcdAlarmList(ctx)
|
||||
if err != nil {
|
||||
if response == nil {
|
||||
return fmt.Errorf("error getting alarms: %w", err)
|
||||
}
|
||||
cli.Warning("%s", err)
|
||||
}
|
||||
|
||||
return displayAlarms(slices.Map(response.Messages, func(v *machine.EtcdAlarm) alarmMessage {
|
||||
return v
|
||||
}))
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
// etcdAlarmDisarmCmd represents the etcd alarm disarm command.
|
||||
var etcdAlarmDisarmCmd = &cobra.Command{
|
||||
Use: "disarm",
|
||||
Short: "Disarm the etcd alarms for the node.",
|
||||
Long: ``,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return WithClient(func(ctx context.Context, c *client.Client) error {
|
||||
response, err := c.EtcdAlarmDisarm(ctx)
|
||||
if err != nil {
|
||||
if response == nil {
|
||||
return fmt.Errorf("error disarming alarms: %w", err)
|
||||
}
|
||||
cli.Warning("%s", err)
|
||||
}
|
||||
|
||||
return displayAlarms(slices.Map(response.Messages, func(v *machine.EtcdAlarmDisarm) alarmMessage {
|
||||
return v
|
||||
}))
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
// etcdDefragCmd represents the etcd defrag command.
|
||||
var etcdDefragCmd = &cobra.Command{
|
||||
Use: "defrag",
|
||||
Short: "Defragment etcd database on the node",
|
||||
Long: `Defragmentation is a maintenance operation that releases unused space from the etcd database file.
|
||||
Defragmentation is a resource heavy operation and should be performed only when necessary on a single node at a time.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return WithClient(func(ctx context.Context, c *client.Client) error {
|
||||
if err := helpers.FailIfMultiNodes(ctx, "etcd defrag"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err := c.EtcdDefragment(ctx)
|
||||
|
||||
return err
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
var etcdLeaveCmd = &cobra.Command{
|
||||
Use: "leave",
|
||||
Short: "Tell nodes to leave etcd cluster",
|
||||
Long: ``,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return WithClient(func(ctx context.Context, c *client.Client) error {
|
||||
if err := helpers.FailIfMultiNodes(ctx, "etcd leave"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.EtcdLeaveCluster(ctx, &machine.EtcdLeaveClusterRequest{})
|
||||
})
|
||||
},
|
||||
@ -146,6 +264,69 @@ var etcdMemberListCmd = &cobra.Command{
|
||||
},
|
||||
}
|
||||
|
||||
var etcdStatusCmd = &cobra.Command{
|
||||
Use: "status",
|
||||
Short: "Get the status of etcd cluster member",
|
||||
Long: `Returns the status of etcd member on the node, use multiple nodes to get status of all members.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return WithClient(func(ctx context.Context, c *client.Client) error {
|
||||
response, err := c.EtcdStatus(ctx)
|
||||
if err != nil {
|
||||
if response == nil {
|
||||
return fmt.Errorf("error getting status: %w", err)
|
||||
}
|
||||
cli.Warning("%s", err)
|
||||
}
|
||||
|
||||
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
|
||||
node := ""
|
||||
pattern := "%s\t%s\t%s (%.2f%%)\t%s\t%d\t%d\t%d\t%v\t%s\n"
|
||||
header := "MEMBER\tDB SIZE\tIN USE\tLEADER\tRAFT INDEX\tRAFT TERM\tRAFT APPLIED INDEX\tLEARNER\tERRORS"
|
||||
|
||||
for i, message := range response.Messages {
|
||||
if message.Metadata != nil && message.Metadata.Hostname != "" {
|
||||
node = message.Metadata.Hostname
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
if node != "" {
|
||||
header = "NODE\t" + header
|
||||
pattern = "%s\t" + pattern
|
||||
}
|
||||
|
||||
fmt.Fprintln(w, header)
|
||||
}
|
||||
|
||||
var ratio float64
|
||||
|
||||
if message.GetMemberStatus().GetDbSize() > 0 {
|
||||
ratio = float64(message.GetMemberStatus().GetDbSizeInUse()) / float64(message.GetMemberStatus().GetDbSize()) * 100.0
|
||||
}
|
||||
|
||||
args := []interface{}{
|
||||
etcdresource.FormatMemberID(message.GetMemberStatus().GetMemberId()),
|
||||
humanize.Bytes(uint64(message.GetMemberStatus().GetDbSize())),
|
||||
humanize.Bytes(uint64(message.GetMemberStatus().GetDbSizeInUse())),
|
||||
ratio,
|
||||
etcdresource.FormatMemberID(message.GetMemberStatus().GetLeader()),
|
||||
message.GetMemberStatus().GetRaftIndex(),
|
||||
message.GetMemberStatus().GetRaftTerm(),
|
||||
message.GetMemberStatus().GetRaftAppliedIndex(),
|
||||
message.GetMemberStatus().GetIsLearner(),
|
||||
strings.Join(message.GetMemberStatus().GetErrors(), ", "),
|
||||
}
|
||||
if node != "" {
|
||||
args = append([]interface{}{node}, args...)
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, pattern, args...)
|
||||
}
|
||||
|
||||
return w.Flush()
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
var etcdSnapshotCmd = &cobra.Command{
|
||||
Use: "snapshot <path>",
|
||||
Short: "Stream snapshot of the etcd node to the path.",
|
||||
@ -228,6 +409,21 @@ var etcdSnapshotCmd = &cobra.Command{
|
||||
}
|
||||
|
||||
func init() {
|
||||
etcdCmd.AddCommand(etcdLeaveCmd, etcdForfeitLeadershipCmd, etcdMemberListCmd, etcdMemberRemoveCmd, etcdSnapshotCmd)
|
||||
etcdAlarmCmd.AddCommand(
|
||||
etcdAlarmListCmd,
|
||||
etcdAlarmDisarmCmd,
|
||||
)
|
||||
|
||||
etcdCmd.AddCommand(
|
||||
etcdAlarmCmd,
|
||||
etcdDefragCmd,
|
||||
etcdForfeitLeadershipCmd,
|
||||
etcdLeaveCmd,
|
||||
etcdMemberListCmd,
|
||||
etcdMemberRemoveCmd,
|
||||
etcdSnapshotCmd,
|
||||
etcdStatusCmd,
|
||||
)
|
||||
|
||||
addCommand(etcdCmd)
|
||||
}
|
||||
|
@ -24,6 +24,19 @@ preface = """\
|
||||
Talos is built with Go 1.19.4.
|
||||
"""
|
||||
|
||||
[notes.etcd]
|
||||
title = "etcd Maintenance"
|
||||
description="""\
|
||||
Talos adds new APIs to make it easier to perform etcd maintenance operations.
|
||||
|
||||
These APIs are available via new `talosctl etcd` sub-commands:
|
||||
|
||||
* `talosctl etcd alarm list|disarm`
|
||||
* `talosctl etcd defrag`
|
||||
* `talosctl etcd status`
|
||||
|
||||
See also [etcd maintenance guide](https://talos.dev/v1.4/advanced/etcd-maintenance/).
|
||||
"""
|
||||
|
||||
[make_deps]
|
||||
|
||||
|
@ -1952,6 +1952,160 @@ func (s *Server) EtcdRecover(srv machine.MachineService_EtcdRecoverServer) error
|
||||
})
|
||||
}
|
||||
|
||||
func mapAlarms(alarms []*etcdserverpb.AlarmMember) []*machine.EtcdMemberAlarm {
|
||||
mapAlarmType := func(alarmType etcdserverpb.AlarmType) machine.EtcdMemberAlarm_AlarmType {
|
||||
switch alarmType {
|
||||
case etcdserverpb.AlarmType_NOSPACE:
|
||||
return machine.EtcdMemberAlarm_NOSPACE
|
||||
case etcdserverpb.AlarmType_CORRUPT:
|
||||
return machine.EtcdMemberAlarm_CORRUPT
|
||||
case etcdserverpb.AlarmType_NONE:
|
||||
return machine.EtcdMemberAlarm_NONE
|
||||
default:
|
||||
return machine.EtcdMemberAlarm_NONE
|
||||
}
|
||||
}
|
||||
|
||||
return slices.Map(alarms, func(alarm *etcdserverpb.AlarmMember) *machine.EtcdMemberAlarm {
|
||||
return &machine.EtcdMemberAlarm{
|
||||
MemberId: alarm.MemberID,
|
||||
Alarm: mapAlarmType(alarm.Alarm),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// EtcdAlarmList lists etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (s *Server) EtcdAlarmList(ctx context.Context, in *emptypb.Empty) (*machine.EtcdAlarmListResponse, error) {
|
||||
if err := s.checkControlplane("etcd alarm list"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client, err := etcd.NewLocalClient()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create etcd client: %w", err)
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer client.Close()
|
||||
|
||||
resp, err := client.AlarmList(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list etcd alarms: %w", err)
|
||||
}
|
||||
|
||||
return &machine.EtcdAlarmListResponse{
|
||||
Messages: []*machine.EtcdAlarm{
|
||||
{
|
||||
MemberAlarms: mapAlarms(resp.Alarms),
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// EtcdAlarmDisarm disarms etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (s *Server) EtcdAlarmDisarm(ctx context.Context, in *emptypb.Empty) (*machine.EtcdAlarmDisarmResponse, error) {
|
||||
if err := s.checkControlplane("etcd alarm list"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client, err := etcd.NewLocalClient()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create etcd client: %w", err)
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer client.Close()
|
||||
|
||||
resp, err := client.AlarmDisarm(ctx, &clientv3.AlarmMember{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to disarm etcd alarm: %w", err)
|
||||
}
|
||||
|
||||
return &machine.EtcdAlarmDisarmResponse{
|
||||
Messages: []*machine.EtcdAlarmDisarm{
|
||||
{
|
||||
MemberAlarms: mapAlarms(resp.Alarms),
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// EtcdDefragment defragments etcd data directory for the current node.
|
||||
//
|
||||
// Defragmentation is a resource-heavy operation, so it should only run on a specific
|
||||
// node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (s *Server) EtcdDefragment(ctx context.Context, in *emptypb.Empty) (*machine.EtcdDefragmentResponse, error) {
|
||||
if err := s.checkControlplane("etcd defragment"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client, err := etcd.NewLocalClient()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create etcd client: %w", err)
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer client.Close()
|
||||
|
||||
_, err = client.Defragment(ctx, nethelpers.JoinHostPort("localhost", constants.EtcdClientPort))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to defragment etcd: %w", err)
|
||||
}
|
||||
|
||||
return &machine.EtcdDefragmentResponse{
|
||||
Messages: []*machine.EtcdDefragment{
|
||||
{},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// EtcdStatus returns etcd status for the member of the cluster.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (s *Server) EtcdStatus(ctx context.Context, in *emptypb.Empty) (*machine.EtcdStatusResponse, error) {
|
||||
if err := s.checkControlplane("etcd status"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client, err := etcd.NewLocalClient()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create etcd client: %w", err)
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer client.Close()
|
||||
|
||||
resp, err := client.Status(ctx, nethelpers.JoinHostPort("localhost", constants.EtcdClientPort))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query etcd status: %w", err)
|
||||
}
|
||||
|
||||
return &machine.EtcdStatusResponse{
|
||||
Messages: []*machine.EtcdStatus{
|
||||
{
|
||||
MemberStatus: &machine.EtcdMemberStatus{
|
||||
MemberId: resp.Header.MemberId,
|
||||
ProtocolVersion: resp.Version,
|
||||
DbSize: resp.DbSize,
|
||||
DbSizeInUse: resp.DbSizeInUse,
|
||||
Leader: resp.Leader,
|
||||
RaftIndex: resp.RaftIndex,
|
||||
RaftTerm: resp.RaftTerm,
|
||||
RaftAppliedIndex: resp.RaftAppliedIndex,
|
||||
Errors: resp.Errors,
|
||||
IsLearner: resp.IsLearner,
|
||||
},
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GenerateClientConfiguration implements the machine.MachineServer interface.
|
||||
func (s *Server) GenerateClientConfiguration(ctx context.Context, in *machine.GenerateClientConfigurationRequest) (*machine.GenerateClientConfigurationResponse, error) {
|
||||
if s.Controller.Runtime().Config().Machine().Type() == machinetype.TypeWorker {
|
||||
|
@ -40,6 +40,9 @@ var rules = map[string]role.Set{
|
||||
"/machine.MachineService/DiskStats": role.MakeSet(role.Admin, role.Reader),
|
||||
"/machine.MachineService/DiskUsage": role.MakeSet(role.Admin, role.Reader),
|
||||
"/machine.MachineService/Dmesg": role.MakeSet(role.Admin, role.Reader),
|
||||
"/machine.MachineService/EtcdAlarmList": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/EtcdAlarmDisarm": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/EtcdDefragment": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/EtcdForfeitLeadership": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/EtcdLeaveCluster": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/EtcdMemberList": role.MakeSet(role.Admin, role.Reader),
|
||||
@ -47,6 +50,7 @@ var rules = map[string]role.Set{
|
||||
"/machine.MachineService/EtcdRemoveMember": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/EtcdRemoveMemberByID": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/EtcdSnapshot": role.MakeSet(role.Admin, role.EtcdBackup),
|
||||
"/machine.MachineService/EtcdStatus": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/Events": role.MakeSet(role.Admin, role.Reader),
|
||||
"/machine.MachineService/GenerateClientConfiguration": role.MakeSet(role.Admin),
|
||||
"/machine.MachineService/GenerateConfiguration": role.MakeSet(role.Admin),
|
||||
|
@ -50,6 +50,13 @@ func (cliSuite *CLISuite) DiscoverNodeInternalIPs(ctx context.Context) []string
|
||||
return mapNodeInfosToInternalIPs(nodes.Nodes())
|
||||
}
|
||||
|
||||
// DiscoverNodeInternalIPsByType provides list of Talos node internal IPs in the cluster for given machine type.
|
||||
func (cliSuite *CLISuite) DiscoverNodeInternalIPsByType(ctx context.Context, machineType machine.Type) []string {
|
||||
nodesByType := cliSuite.DiscoverNodes(ctx).NodesByType(machineType)
|
||||
|
||||
return mapNodeInfosToInternalIPs(nodesByType)
|
||||
}
|
||||
|
||||
// RandomDiscoveredNodeInternalIP returns the internal IP a random node of the specified type (or any type if no types are specified).
|
||||
//
|
||||
//nolint:dupl
|
||||
|
@ -10,6 +10,7 @@ import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/siderolabs/talos/internal/integration/base"
|
||||
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1/machine"
|
||||
@ -30,6 +31,21 @@ func (suite *EtcdSuite) TestMembers() {
|
||||
suite.RunCLI([]string{"etcd", "members", "--nodes", suite.RandomDiscoveredNodeInternalIP(machine.TypeControlPlane)}) // default checks for stdout not empty
|
||||
}
|
||||
|
||||
// TestStatus etcd status should have some output.
|
||||
func (suite *EtcdSuite) TestStatus() {
|
||||
cpNodes := suite.DiscoverNodeInternalIPsByType(context.TODO(), machine.TypeControlPlane)
|
||||
|
||||
suite.RunCLI([]string{"etcd", "status", "--nodes", strings.Join(cpNodes, ",")}) // default checks for stdout not empty
|
||||
}
|
||||
|
||||
// TestAlarm etcd alarm should have no output.
|
||||
func (suite *EtcdSuite) TestAlarm() {
|
||||
cpNode := suite.RandomDiscoveredNodeInternalIP(machine.TypeControlPlane)
|
||||
|
||||
suite.RunCLI([]string{"etcd", "alarm", "list", "--nodes", cpNode}, base.StdoutEmpty())
|
||||
suite.RunCLI([]string{"etcd", "alarm", "disarm", "--nodes", cpNode}, base.StdoutEmpty())
|
||||
}
|
||||
|
||||
// TestForfeitLeadership etcd forfeit-leadership check.
|
||||
func (suite *EtcdSuite) TestForfeitLeadership() {
|
||||
nodes := suite.DiscoverNodes(context.TODO()).NodesByType(machine.TypeControlPlane)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -63,6 +63,25 @@ type MachineServiceClient interface {
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdSnapshot(ctx context.Context, in *EtcdSnapshotRequest, opts ...grpc.CallOption) (MachineService_EtcdSnapshotClient, error)
|
||||
// EtcdAlarmList lists etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdAlarmList(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmListResponse, error)
|
||||
// EtcdAlarmDisarm disarms etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdAlarmDisarm(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmDisarmResponse, error)
|
||||
// EtcdDefragment defragments etcd data directory for the current node.
|
||||
//
|
||||
// Defragmentation is a resource-heavy operation, so it should only run on a specific
|
||||
// node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdDefragment(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdDefragmentResponse, error)
|
||||
// EtcdStatus returns etcd status for the current member.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdStatus(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdStatusResponse, error)
|
||||
GenerateConfiguration(ctx context.Context, in *GenerateConfigurationRequest, opts ...grpc.CallOption) (*GenerateConfigurationResponse, error)
|
||||
Hostname(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*HostnameResponse, error)
|
||||
Kubeconfig(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (MachineService_KubeconfigClient, error)
|
||||
@ -355,6 +374,42 @@ func (x *machineServiceEtcdSnapshotClient) Recv() (*common.Data, error) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (c *machineServiceClient) EtcdAlarmList(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmListResponse, error) {
|
||||
out := new(EtcdAlarmListResponse)
|
||||
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdAlarmList", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *machineServiceClient) EtcdAlarmDisarm(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdAlarmDisarmResponse, error) {
|
||||
out := new(EtcdAlarmDisarmResponse)
|
||||
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdAlarmDisarm", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *machineServiceClient) EtcdDefragment(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdDefragmentResponse, error) {
|
||||
out := new(EtcdDefragmentResponse)
|
||||
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdDefragment", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *machineServiceClient) EtcdStatus(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*EtcdStatusResponse, error) {
|
||||
out := new(EtcdStatusResponse)
|
||||
err := c.cc.Invoke(ctx, "/machine.MachineService/EtcdStatus", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *machineServiceClient) GenerateConfiguration(ctx context.Context, in *GenerateConfigurationRequest, opts ...grpc.CallOption) (*GenerateConfigurationResponse, error) {
|
||||
out := new(GenerateConfigurationResponse)
|
||||
err := c.cc.Invoke(ctx, "/machine.MachineService/GenerateConfiguration", in, out, opts...)
|
||||
@ -777,6 +832,25 @@ type MachineServiceServer interface {
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdSnapshot(*EtcdSnapshotRequest, MachineService_EtcdSnapshotServer) error
|
||||
// EtcdAlarmList lists etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdAlarmList(context.Context, *emptypb.Empty) (*EtcdAlarmListResponse, error)
|
||||
// EtcdAlarmDisarm disarms etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdAlarmDisarm(context.Context, *emptypb.Empty) (*EtcdAlarmDisarmResponse, error)
|
||||
// EtcdDefragment defragments etcd data directory for the current node.
|
||||
//
|
||||
// Defragmentation is a resource-heavy operation, so it should only run on a specific
|
||||
// node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdDefragment(context.Context, *emptypb.Empty) (*EtcdDefragmentResponse, error)
|
||||
// EtcdStatus returns etcd status for the current member.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
EtcdStatus(context.Context, *emptypb.Empty) (*EtcdStatusResponse, error)
|
||||
GenerateConfiguration(context.Context, *GenerateConfigurationRequest) (*GenerateConfigurationResponse, error)
|
||||
Hostname(context.Context, *emptypb.Empty) (*HostnameResponse, error)
|
||||
Kubeconfig(*emptypb.Empty, MachineService_KubeconfigServer) error
|
||||
@ -858,6 +932,18 @@ func (UnimplementedMachineServiceServer) EtcdRecover(MachineService_EtcdRecoverS
|
||||
func (UnimplementedMachineServiceServer) EtcdSnapshot(*EtcdSnapshotRequest, MachineService_EtcdSnapshotServer) error {
|
||||
return status.Errorf(codes.Unimplemented, "method EtcdSnapshot not implemented")
|
||||
}
|
||||
func (UnimplementedMachineServiceServer) EtcdAlarmList(context.Context, *emptypb.Empty) (*EtcdAlarmListResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method EtcdAlarmList not implemented")
|
||||
}
|
||||
func (UnimplementedMachineServiceServer) EtcdAlarmDisarm(context.Context, *emptypb.Empty) (*EtcdAlarmDisarmResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method EtcdAlarmDisarm not implemented")
|
||||
}
|
||||
func (UnimplementedMachineServiceServer) EtcdDefragment(context.Context, *emptypb.Empty) (*EtcdDefragmentResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method EtcdDefragment not implemented")
|
||||
}
|
||||
func (UnimplementedMachineServiceServer) EtcdStatus(context.Context, *emptypb.Empty) (*EtcdStatusResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method EtcdStatus not implemented")
|
||||
}
|
||||
func (UnimplementedMachineServiceServer) GenerateConfiguration(context.Context, *GenerateConfigurationRequest) (*GenerateConfigurationResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method GenerateConfiguration not implemented")
|
||||
}
|
||||
@ -1242,6 +1328,78 @@ func (x *machineServiceEtcdSnapshotServer) Send(m *common.Data) error {
|
||||
return x.ServerStream.SendMsg(m)
|
||||
}
|
||||
|
||||
func _MachineService_EtcdAlarmList_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(emptypb.Empty)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(MachineServiceServer).EtcdAlarmList(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/machine.MachineService/EtcdAlarmList",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(MachineServiceServer).EtcdAlarmList(ctx, req.(*emptypb.Empty))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _MachineService_EtcdAlarmDisarm_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(emptypb.Empty)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(MachineServiceServer).EtcdAlarmDisarm(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/machine.MachineService/EtcdAlarmDisarm",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(MachineServiceServer).EtcdAlarmDisarm(ctx, req.(*emptypb.Empty))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _MachineService_EtcdDefragment_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(emptypb.Empty)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(MachineServiceServer).EtcdDefragment(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/machine.MachineService/EtcdDefragment",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(MachineServiceServer).EtcdDefragment(ctx, req.(*emptypb.Empty))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _MachineService_EtcdStatus_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(emptypb.Empty)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(MachineServiceServer).EtcdStatus(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/machine.MachineService/EtcdStatus",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(MachineServiceServer).EtcdStatus(ctx, req.(*emptypb.Empty))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _MachineService_GenerateConfiguration_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(GenerateConfigurationRequest)
|
||||
if err := dec(in); err != nil {
|
||||
@ -1793,6 +1951,22 @@ var MachineService_ServiceDesc = grpc.ServiceDesc{
|
||||
MethodName: "EtcdForfeitLeadership",
|
||||
Handler: _MachineService_EtcdForfeitLeadership_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "EtcdAlarmList",
|
||||
Handler: _MachineService_EtcdAlarmList_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "EtcdAlarmDisarm",
|
||||
Handler: _MachineService_EtcdAlarmDisarm_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "EtcdDefragment",
|
||||
Handler: _MachineService_EtcdDefragment_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "EtcdStatus",
|
||||
Handler: _MachineService_EtcdStatus_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "GenerateConfiguration",
|
||||
Handler: _MachineService_GenerateConfiguration_Handler,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -839,6 +839,61 @@ func (c *Client) EtcdRecover(ctx context.Context, snapshot io.Reader, callOption
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// EtcdAlarmList lists etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (c *Client) EtcdAlarmList(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdAlarmListResponse, error) {
|
||||
resp, err := c.MachineClient.EtcdAlarmList(ctx, &emptypb.Empty{}, opts...)
|
||||
|
||||
var filtered interface{}
|
||||
filtered, err = FilterMessages(resp, err)
|
||||
resp, _ = filtered.(*machineapi.EtcdAlarmListResponse) //nolint:errcheck
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// EtcdAlarmDisarm disarms etcd alarms for the current node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (c *Client) EtcdAlarmDisarm(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdAlarmDisarmResponse, error) {
|
||||
resp, err := c.MachineClient.EtcdAlarmDisarm(ctx, &emptypb.Empty{}, opts...)
|
||||
|
||||
var filtered interface{}
|
||||
filtered, err = FilterMessages(resp, err)
|
||||
resp, _ = filtered.(*machineapi.EtcdAlarmDisarmResponse) //nolint:errcheck
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// EtcdDefragment defragments etcd data directory for the current node.
|
||||
//
|
||||
// Defragmentation is a resource-heavy operation, so it should only run on a specific
|
||||
// node.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (c *Client) EtcdDefragment(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdDefragmentResponse, error) {
|
||||
resp, err := c.MachineClient.EtcdDefragment(ctx, &emptypb.Empty{}, opts...)
|
||||
|
||||
var filtered interface{}
|
||||
filtered, err = FilterMessages(resp, err)
|
||||
resp, _ = filtered.(*machineapi.EtcdDefragmentResponse) //nolint:errcheck
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// EtcdStatus returns etcd status for the current member.
|
||||
//
|
||||
// This method is available only on control plane nodes (which run etcd).
|
||||
func (c *Client) EtcdStatus(ctx context.Context, opts ...grpc.CallOption) (*machineapi.EtcdStatusResponse, error) {
|
||||
resp, err := c.MachineClient.EtcdStatus(ctx, &emptypb.Empty{}, opts...)
|
||||
|
||||
var filtered interface{}
|
||||
filtered, err = FilterMessages(resp, err)
|
||||
resp, _ = filtered.(*machineapi.EtcdStatusResponse) //nolint:errcheck
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// GenerateClientConfiguration implements proto.MachineServiceClient interface.
|
||||
func (c *Client) GenerateClientConfiguration(ctx context.Context, req *machineapi.GenerateClientConfigurationRequest, callOptions ...grpc.CallOption) (resp *machineapi.GenerateClientConfigurationResponse, err error) { //nolint:lll
|
||||
resp, err = c.MachineClient.GenerateClientConfiguration(ctx, req, callOptions...)
|
||||
|
76
website/content/v1.4/advanced/etcd-maintenance.md
Normal file
76
website/content/v1.4/advanced/etcd-maintenance.md
Normal file
@ -0,0 +1,76 @@
|
||||
---
|
||||
title: "etcd Maintenance"
|
||||
description: "Operational instructions for etcd database."
|
||||
---
|
||||
|
||||
`etcd` database backs Kubernetes control plane state, so `etcd` health is critical for Kubernetes availability.
|
||||
|
||||
## Space Quota
|
||||
|
||||
`etcd` default database space quota is set to 2 GiB by default.
|
||||
If the database size exceeds the quota, `etcd` will stop operations until the issue is resolved.
|
||||
|
||||
This condition can be checked with `talosctl etcd alarm list` command:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> etcd alarm list
|
||||
NODE MEMBER ALARM
|
||||
172.20.0.2 a49c021e76e707db NOSPACE
|
||||
```
|
||||
|
||||
If the Kubernetes database contains lots of resources, space quota can be increased to match the actual usage.
|
||||
The recommended maximum size is 8 GiB.
|
||||
|
||||
To increase the space quota, edit the `etcd` section in the machine configuration:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
etcd:
|
||||
extraArgs:
|
||||
quota-backend-bytes: 4294967296 # 4 GiB
|
||||
```
|
||||
|
||||
Once the node is rebooted with the new configuration, use `talosctl etcd alarm disarm` to clear the `NOSPACE` alarm.
|
||||
|
||||
## Defragmentation
|
||||
|
||||
`etcd` database can become fragmented over time if there are lots of writes and deletes.
|
||||
Kubernetes API server performs automatic compaction of the `etcd` database, which marks deleted space as free and ready to be reused.
|
||||
However, the space is not actually freed until the database is defragmented.
|
||||
|
||||
If the database is heavily fragmented (in use/db size ratio is less than 0.5), defragmentation might increase the performance.
|
||||
If the database runs over the space quota (see above), but the actual in use database size is small, defragmentation is required to bring the on-disk database size below the limit.
|
||||
|
||||
Current database size can be checked with `talosctl etcd status` command:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CP1>,<CP2>,<CP3> etcd status
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER ERRORS
|
||||
172.20.0.3 ecebb05b59a776f1 21 MB 6.0 MB (29.08%) ecebb05b59a776f1 53391 4 53391 false
|
||||
172.20.0.2 a49c021e76e707db 17 MB 4.5 MB (26.10%) ecebb05b59a776f1 53391 4 53391 false
|
||||
172.20.0.4 eb47fb33e59bf0e2 20 MB 5.9 MB (28.96%) ecebb05b59a776f1 53391 4 53391 false
|
||||
```
|
||||
|
||||
If any of the nodes are over database size quota, alarms will be printed in the `ERRORS` column.
|
||||
|
||||
To defragment the database, run `talosctl etcd defrag` command:
|
||||
|
||||
```bash
|
||||
talosctl -n <CP1> etcd defrag
|
||||
```
|
||||
|
||||
> Note: defragmentation is a resource-intensive operation, so it is recommended to run it on a single node at a time.
|
||||
> Defragmentation to a live member blocks the system from reading and writing data while rebuilding its state.
|
||||
|
||||
Once the defragmentation is complete, the database size will match closely to the in use size:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CP1> etcd status
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER ERRORS
|
||||
172.20.0.2 a49c021e76e707db 4.5 MB 4.5 MB (100.00%) ecebb05b59a776f1 56065 4 56065 false
|
||||
```
|
||||
|
||||
## Snapshotting
|
||||
|
||||
Regular backups of `etcd` database should be performed to ensure that the cluster can be restored in case of a failure.
|
||||
This procedure is described in the [disaster recovery]({{< relref "disaster-recovery" >}}) guide.
|
@ -235,6 +235,12 @@ description: Talos gRPC API reference.
|
||||
- [DiskUsageInfo](#machine.DiskUsageInfo)
|
||||
- [DiskUsageRequest](#machine.DiskUsageRequest)
|
||||
- [DmesgRequest](#machine.DmesgRequest)
|
||||
- [EtcdAlarm](#machine.EtcdAlarm)
|
||||
- [EtcdAlarmDisarm](#machine.EtcdAlarmDisarm)
|
||||
- [EtcdAlarmDisarmResponse](#machine.EtcdAlarmDisarmResponse)
|
||||
- [EtcdAlarmListResponse](#machine.EtcdAlarmListResponse)
|
||||
- [EtcdDefragment](#machine.EtcdDefragment)
|
||||
- [EtcdDefragmentResponse](#machine.EtcdDefragmentResponse)
|
||||
- [EtcdForfeitLeadership](#machine.EtcdForfeitLeadership)
|
||||
- [EtcdForfeitLeadershipRequest](#machine.EtcdForfeitLeadershipRequest)
|
||||
- [EtcdForfeitLeadershipResponse](#machine.EtcdForfeitLeadershipResponse)
|
||||
@ -242,8 +248,10 @@ description: Talos gRPC API reference.
|
||||
- [EtcdLeaveClusterRequest](#machine.EtcdLeaveClusterRequest)
|
||||
- [EtcdLeaveClusterResponse](#machine.EtcdLeaveClusterResponse)
|
||||
- [EtcdMember](#machine.EtcdMember)
|
||||
- [EtcdMemberAlarm](#machine.EtcdMemberAlarm)
|
||||
- [EtcdMemberListRequest](#machine.EtcdMemberListRequest)
|
||||
- [EtcdMemberListResponse](#machine.EtcdMemberListResponse)
|
||||
- [EtcdMemberStatus](#machine.EtcdMemberStatus)
|
||||
- [EtcdMembers](#machine.EtcdMembers)
|
||||
- [EtcdRecover](#machine.EtcdRecover)
|
||||
- [EtcdRecoverResponse](#machine.EtcdRecoverResponse)
|
||||
@ -254,6 +262,8 @@ description: Talos gRPC API reference.
|
||||
- [EtcdRemoveMemberRequest](#machine.EtcdRemoveMemberRequest)
|
||||
- [EtcdRemoveMemberResponse](#machine.EtcdRemoveMemberResponse)
|
||||
- [EtcdSnapshotRequest](#machine.EtcdSnapshotRequest)
|
||||
- [EtcdStatus](#machine.EtcdStatus)
|
||||
- [EtcdStatusResponse](#machine.EtcdStatusResponse)
|
||||
- [Event](#machine.Event)
|
||||
- [EventsRequest](#machine.EventsRequest)
|
||||
- [FeaturesInfo](#machine.FeaturesInfo)
|
||||
@ -344,6 +354,7 @@ description: Talos gRPC API reference.
|
||||
- [VersionResponse](#machine.VersionResponse)
|
||||
|
||||
- [ApplyConfigurationRequest.Mode](#machine.ApplyConfigurationRequest.Mode)
|
||||
- [EtcdMemberAlarm.AlarmType](#machine.EtcdMemberAlarm.AlarmType)
|
||||
- [ListRequest.Type](#machine.ListRequest.Type)
|
||||
- [MachineConfig.MachineType](#machine.MachineConfig.MachineType)
|
||||
- [MachineStatusEvent.MachineStage](#machine.MachineStatusEvent.MachineStage)
|
||||
@ -4109,6 +4120,98 @@ dmesg
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdAlarm"></a>
|
||||
|
||||
### EtcdAlarm
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| metadata | [common.Metadata](#common.Metadata) | | |
|
||||
| member_alarms | [EtcdMemberAlarm](#machine.EtcdMemberAlarm) | repeated | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdAlarmDisarm"></a>
|
||||
|
||||
### EtcdAlarmDisarm
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| metadata | [common.Metadata](#common.Metadata) | | |
|
||||
| member_alarms | [EtcdMemberAlarm](#machine.EtcdMemberAlarm) | repeated | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdAlarmDisarmResponse"></a>
|
||||
|
||||
### EtcdAlarmDisarmResponse
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| messages | [EtcdAlarmDisarm](#machine.EtcdAlarmDisarm) | repeated | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdAlarmListResponse"></a>
|
||||
|
||||
### EtcdAlarmListResponse
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| messages | [EtcdAlarm](#machine.EtcdAlarm) | repeated | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdDefragment"></a>
|
||||
|
||||
### EtcdDefragment
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| metadata | [common.Metadata](#common.Metadata) | | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdDefragmentResponse"></a>
|
||||
|
||||
### EtcdDefragmentResponse
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| messages | [EtcdDefragment](#machine.EtcdDefragment) | repeated | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdForfeitLeadership"></a>
|
||||
|
||||
### EtcdForfeitLeadership
|
||||
@ -4209,6 +4312,22 @@ EtcdMember describes a single etcd member.
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdMemberAlarm"></a>
|
||||
|
||||
### EtcdMemberAlarm
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| member_id | [uint64](#uint64) | | |
|
||||
| alarm | [EtcdMemberAlarm.AlarmType](#machine.EtcdMemberAlarm.AlarmType) | | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdMemberListRequest"></a>
|
||||
|
||||
### EtcdMemberListRequest
|
||||
@ -4239,6 +4358,30 @@ EtcdMember describes a single etcd member.
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdMemberStatus"></a>
|
||||
|
||||
### EtcdMemberStatus
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| member_id | [uint64](#uint64) | | |
|
||||
| protocol_version | [string](#string) | | |
|
||||
| db_size | [int64](#int64) | | |
|
||||
| db_size_in_use | [int64](#int64) | | |
|
||||
| leader | [uint64](#uint64) | | |
|
||||
| raft_index | [uint64](#uint64) | | |
|
||||
| raft_term | [uint64](#uint64) | | |
|
||||
| raft_applied_index | [uint64](#uint64) | | |
|
||||
| errors | [string](#string) | repeated | |
|
||||
| is_learner | [bool](#bool) | | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdMembers"></a>
|
||||
|
||||
### EtcdMembers
|
||||
@ -4386,6 +4529,37 @@ EtcdMembers contains the list of members registered on the host.
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdStatus"></a>
|
||||
|
||||
### EtcdStatus
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| metadata | [common.Metadata](#common.Metadata) | | |
|
||||
| member_status | [EtcdMemberStatus](#machine.EtcdMemberStatus) | | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdStatusResponse"></a>
|
||||
|
||||
### EtcdStatusResponse
|
||||
|
||||
|
||||
|
||||
| Field | Type | Label | Description |
|
||||
| ----- | ---- | ----- | ----------- |
|
||||
| messages | [EtcdStatus](#machine.EtcdStatus) | repeated | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<a name="machine.Event"></a>
|
||||
|
||||
### Event
|
||||
@ -5926,6 +6100,19 @@ rpc upgrade
|
||||
|
||||
|
||||
|
||||
<a name="machine.EtcdMemberAlarm.AlarmType"></a>
|
||||
|
||||
### EtcdMemberAlarm.AlarmType
|
||||
|
||||
|
||||
| Name | Number | Description |
|
||||
| ---- | ------ | ----------- |
|
||||
| NONE | 0 | |
|
||||
| NOSPACE | 1 | |
|
||||
| CORRUPT | 2 | |
|
||||
|
||||
|
||||
|
||||
<a name="machine.ListRequest.Type"></a>
|
||||
|
||||
### ListRequest.Type
|
||||
@ -6076,6 +6263,20 @@ This API should be used to remove members which don't have an associated Talos n
|
||||
Snapshot can be later used to recover the cluster via Bootstrap method. |
|
||||
| EtcdSnapshot | [EtcdSnapshotRequest](#machine.EtcdSnapshotRequest) | [.common.Data](#common.Data) stream | EtcdSnapshot method creates etcd data snapshot (backup) from the local etcd instance and streams it back to the client.
|
||||
|
||||
This method is available only on control plane nodes (which run etcd). |
|
||||
| EtcdAlarmList | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdAlarmListResponse](#machine.EtcdAlarmListResponse) | EtcdAlarmList lists etcd alarms for the current node.
|
||||
|
||||
This method is available only on control plane nodes (which run etcd). |
|
||||
| EtcdAlarmDisarm | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdAlarmDisarmResponse](#machine.EtcdAlarmDisarmResponse) | EtcdAlarmDisarm disarms etcd alarms for the current node.
|
||||
|
||||
This method is available only on control plane nodes (which run etcd). |
|
||||
| EtcdDefragment | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdDefragmentResponse](#machine.EtcdDefragmentResponse) | EtcdDefragment defragments etcd data directory for the current node.
|
||||
|
||||
Defragmentation is a resource-heavy operation, so it should only run on a specific node.
|
||||
|
||||
This method is available only on control plane nodes (which run etcd). |
|
||||
| EtcdStatus | [.google.protobuf.Empty](#google.protobuf.Empty) | [EtcdStatusResponse](#machine.EtcdStatusResponse) | EtcdStatus returns etcd status for the current member.
|
||||
|
||||
This method is available only on control plane nodes (which run etcd). |
|
||||
| GenerateConfiguration | [GenerateConfigurationRequest](#machine.GenerateConfigurationRequest) | [GenerateConfigurationResponse](#machine.GenerateConfigurationResponse) | |
|
||||
| Hostname | [.google.protobuf.Empty](#google.protobuf.Empty) | [HostnameResponse](#machine.HostnameResponse) | |
|
||||
|
@ -901,6 +901,121 @@ talosctl edit <type> [<id>] [flags]
|
||||
|
||||
* [talosctl](#talosctl) - A CLI for out-of-band management of Kubernetes nodes created by Talos
|
||||
|
||||
## talosctl etcd alarm disarm
|
||||
|
||||
Disarm the etcd alarms for the node.
|
||||
|
||||
```
|
||||
talosctl etcd alarm disarm [flags]
|
||||
```
|
||||
|
||||
### Options
|
||||
|
||||
```
|
||||
-h, --help help for disarm
|
||||
```
|
||||
|
||||
### Options inherited from parent commands
|
||||
|
||||
```
|
||||
--cluster string Cluster to connect to if a proxy endpoint is used.
|
||||
--context string Context to be used in command
|
||||
-e, --endpoints strings override default endpoints in Talos configuration
|
||||
-n, --nodes strings target the specified nodes
|
||||
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
|
||||
```
|
||||
|
||||
### SEE ALSO
|
||||
|
||||
* [talosctl etcd alarm](#talosctl-etcd-alarm) - Manage etcd alarms
|
||||
|
||||
## talosctl etcd alarm list
|
||||
|
||||
List the etcd alarms for the node.
|
||||
|
||||
```
|
||||
talosctl etcd alarm list [flags]
|
||||
```
|
||||
|
||||
### Options
|
||||
|
||||
```
|
||||
-h, --help help for list
|
||||
```
|
||||
|
||||
### Options inherited from parent commands
|
||||
|
||||
```
|
||||
--cluster string Cluster to connect to if a proxy endpoint is used.
|
||||
--context string Context to be used in command
|
||||
-e, --endpoints strings override default endpoints in Talos configuration
|
||||
-n, --nodes strings target the specified nodes
|
||||
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
|
||||
```
|
||||
|
||||
### SEE ALSO
|
||||
|
||||
* [talosctl etcd alarm](#talosctl-etcd-alarm) - Manage etcd alarms
|
||||
|
||||
## talosctl etcd alarm
|
||||
|
||||
Manage etcd alarms
|
||||
|
||||
### Options
|
||||
|
||||
```
|
||||
-h, --help help for alarm
|
||||
```
|
||||
|
||||
### Options inherited from parent commands
|
||||
|
||||
```
|
||||
--cluster string Cluster to connect to if a proxy endpoint is used.
|
||||
--context string Context to be used in command
|
||||
-e, --endpoints strings override default endpoints in Talos configuration
|
||||
-n, --nodes strings target the specified nodes
|
||||
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
|
||||
```
|
||||
|
||||
### SEE ALSO
|
||||
|
||||
* [talosctl etcd](#talosctl-etcd) - Manage etcd
|
||||
* [talosctl etcd alarm disarm](#talosctl-etcd-alarm-disarm) - Disarm the etcd alarms for the node.
|
||||
* [talosctl etcd alarm list](#talosctl-etcd-alarm-list) - List the etcd alarms for the node.
|
||||
|
||||
## talosctl etcd defrag
|
||||
|
||||
Defragment etcd database on the node
|
||||
|
||||
### Synopsis
|
||||
|
||||
Defragmentation is a maintenance operation that releases unused space from the etcd database file.
|
||||
Defragmentation is a resource heavy operation and should be performed only when necessary on a single node at a time.
|
||||
|
||||
```
|
||||
talosctl etcd defrag [flags]
|
||||
```
|
||||
|
||||
### Options
|
||||
|
||||
```
|
||||
-h, --help help for defrag
|
||||
```
|
||||
|
||||
### Options inherited from parent commands
|
||||
|
||||
```
|
||||
--cluster string Cluster to connect to if a proxy endpoint is used.
|
||||
--context string Context to be used in command
|
||||
-e, --endpoints strings override default endpoints in Talos configuration
|
||||
-n, --nodes strings target the specified nodes
|
||||
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
|
||||
```
|
||||
|
||||
### SEE ALSO
|
||||
|
||||
* [talosctl etcd](#talosctl-etcd) - Manage etcd
|
||||
|
||||
## talosctl etcd forfeit-leadership
|
||||
|
||||
Tell node to forfeit etcd cluster leadership
|
||||
@ -1048,6 +1163,38 @@ talosctl etcd snapshot <path> [flags]
|
||||
|
||||
* [talosctl etcd](#talosctl-etcd) - Manage etcd
|
||||
|
||||
## talosctl etcd status
|
||||
|
||||
Get the status of etcd cluster member
|
||||
|
||||
### Synopsis
|
||||
|
||||
Returns the status of etcd member on the node, use multiple nodes to get status of all members.
|
||||
|
||||
```
|
||||
talosctl etcd status [flags]
|
||||
```
|
||||
|
||||
### Options
|
||||
|
||||
```
|
||||
-h, --help help for status
|
||||
```
|
||||
|
||||
### Options inherited from parent commands
|
||||
|
||||
```
|
||||
--cluster string Cluster to connect to if a proxy endpoint is used.
|
||||
--context string Context to be used in command
|
||||
-e, --endpoints strings override default endpoints in Talos configuration
|
||||
-n, --nodes strings target the specified nodes
|
||||
--talosconfig string The path to the Talos configuration file. Defaults to 'TALOSCONFIG' env variable if set, otherwise '$HOME/.talos/config' and '/var/run/secrets/talos.dev/config' in order.
|
||||
```
|
||||
|
||||
### SEE ALSO
|
||||
|
||||
* [talosctl etcd](#talosctl-etcd) - Manage etcd
|
||||
|
||||
## talosctl etcd
|
||||
|
||||
Manage etcd
|
||||
@ -1071,11 +1218,14 @@ Manage etcd
|
||||
### SEE ALSO
|
||||
|
||||
* [talosctl](#talosctl) - A CLI for out-of-band management of Kubernetes nodes created by Talos
|
||||
* [talosctl etcd alarm](#talosctl-etcd-alarm) - Manage etcd alarms
|
||||
* [talosctl etcd defrag](#talosctl-etcd-defrag) - Defragment etcd database on the node
|
||||
* [talosctl etcd forfeit-leadership](#talosctl-etcd-forfeit-leadership) - Tell node to forfeit etcd cluster leadership
|
||||
* [talosctl etcd leave](#talosctl-etcd-leave) - Tell nodes to leave etcd cluster
|
||||
* [talosctl etcd members](#talosctl-etcd-members) - Get the list of etcd cluster members
|
||||
* [talosctl etcd remove-member](#talosctl-etcd-remove-member) - Remove the node from etcd cluster
|
||||
* [talosctl etcd snapshot](#talosctl-etcd-snapshot) - Stream snapshot of the etcd node to the path.
|
||||
* [talosctl etcd status](#talosctl-etcd-status) - Get the status of etcd cluster member
|
||||
|
||||
## talosctl events
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user