feat: add new etcd members in learner mode

Fixes #3714

This provides more safe way to join new members to the etcd cluster.

See https://etcd.io/docs/v3.4/learning/design-learner/

With learner mode join there are few differences:

* new nodes are joined one by one, because etcd enforces a single
learner member in the cluster
* learner members are not counted in quorum calculations, so while
learner catches up with the master node, quorum is not affected and
cluster is still operational

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2021-06-17 17:03:38 +03:00 committed by Andrey Smirnov
parent b1c66fbad1
commit eefe1c21c3
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
8 changed files with 123 additions and 19 deletions

View File

@ -791,6 +791,8 @@ message EtcdMember {
repeated string peer_urls = 4; repeated string peer_urls = 4;
// the list of URLs the member exposes to the cluster for communication. // the list of URLs the member exposes to the cluster for communication.
repeated string client_urls = 5; repeated string client_urls = 5;
// learner flag
bool is_learner = 6;
} }
// EtcdMembers contains the list of members registered on the host. // EtcdMembers contains the list of members registered on the host.

View File

@ -89,7 +89,7 @@ var etcdMemberListCmd = &cobra.Command{
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0) w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
node := "" node := ""
pattern := "%s\t%s\t%s\t%s\n" pattern := "%s\t%s\t%s\t%s\t%v\n"
for i, message := range response.Messages { for i, message := range response.Messages {
if message.Metadata != nil && message.Metadata.Hostname != "" { if message.Metadata != nil && message.Metadata.Hostname != "" {
@ -103,10 +103,10 @@ var etcdMemberListCmd = &cobra.Command{
for j, member := range message.Members { for j, member := range message.Members {
if i == 0 && j == 0 { if i == 0 && j == 0 {
if node != "" { if node != "" {
fmt.Fprintln(w, "NODE\tID\tHOSTNAME\tPEER URLS\tCLIENT URLS") fmt.Fprintln(w, "NODE\tID\tHOSTNAME\tPEER URLS\tCLIENT URLS\tLEARNER")
pattern = "%s\t" + pattern pattern = "%s\t" + pattern
} else { } else {
fmt.Fprintln(w, "ID\tHOSTNAME\tPEER URLS\tCLIENT URLS") fmt.Fprintln(w, "ID\tHOSTNAME\tPEER URLS\tCLIENT URLS\tLEARNER")
} }
} }
@ -115,6 +115,7 @@ var etcdMemberListCmd = &cobra.Command{
member.Hostname, member.Hostname,
strings.Join(member.PeerUrls, ","), strings.Join(member.PeerUrls, ","),
strings.Join(member.ClientUrls, ","), strings.Join(member.ClientUrls, ","),
member.IsLearner,
} }
if node != "" { if node != "" {
args = append([]interface{}{node}, args...) args = append([]interface{}{node}, args...)

View File

@ -61,6 +61,13 @@ the default values overwritten by Talos.
* runc: 1.0.1 * runc: 1.0.1
* GRUB: 2.06 * GRUB: 2.06
* Talos is built with Go 1.16.6 * Talos is built with Go 1.16.6
"""
[notes.etcd]
title = "etcd"
description = """\
New etcd cluster members are now joined in [learner mode](https://etcd.io/docs/v3.4/learning/design-learner/), which improves cluster resiliency
to member join issues.
""" """
[notes.capi] [notes.capi]

View File

@ -1712,6 +1712,7 @@ func (s *Server) EtcdMemberList(ctx context.Context, in *machine.EtcdMemberListR
Hostname: member.GetName(), Hostname: member.GetName(),
PeerUrls: member.GetPeerURLs(), PeerUrls: member.GetPeerURLs(),
ClientUrls: member.GetClientURLs(), ClientUrls: member.GetClientURLs(),
IsLearner: member.GetIsLearner(),
}, },
) )

View File

@ -58,6 +58,11 @@ type Etcd struct {
args []string args []string
client *etcd.Client client *etcd.Client
// if the new member was added as a learner during the service start, its ID is kept here
learnerMemberID uint64
promoteCtxCancel context.CancelFunc
} }
// ID implements the Service interface. // ID implements the Service interface.
@ -95,6 +100,9 @@ func (e *Etcd) PreFunc(ctx context.Context, r runtime.Runtime) (err error) {
return fmt.Errorf("failed to pull image %q: %w", r.Config().Cluster().Etcd().Image(), err) return fmt.Errorf("failed to pull image %q: %w", r.Config().Cluster().Etcd().Image(), err)
} }
// Clear any previously set learner member ID
e.learnerMemberID = 0
switch t := r.Config().Machine().Type(); t { switch t := r.Config().Machine().Type(); t {
case machine.TypeInit: case machine.TypeInit:
return e.argsForInit(ctx, r) return e.argsForInit(ctx, r)
@ -111,6 +119,10 @@ func (e *Etcd) PreFunc(ctx context.Context, r runtime.Runtime) (err error) {
// PostFunc implements the Service interface. // PostFunc implements the Service interface.
func (e *Etcd) PostFunc(r runtime.Runtime, state events.ServiceState) (err error) { func (e *Etcd) PostFunc(r runtime.Runtime, state events.ServiceState) (err error) {
if e.promoteCtxCancel != nil {
e.promoteCtxCancel()
}
if e.client != nil { if e.client != nil {
e.client.Close() //nolint:errcheck e.client.Close() //nolint:errcheck
} }
@ -157,6 +169,20 @@ func (e *Etcd) Runner(r runtime.Runtime) (runner.Runner, error) {
env = append(env, "ETCD_CIPHER_SUITES=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305") //nolint:lll env = append(env, "ETCD_CIPHER_SUITES=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305") //nolint:lll
if e.learnerMemberID != 0 {
var promoteCtx context.Context
promoteCtx, e.promoteCtxCancel = context.WithCancel(context.Background())
go func() {
if err := promoteMember(promoteCtx, r, e.learnerMemberID); err != nil && !errors.Is(err, context.Canceled) {
log.Printf("failed promoting member: %s", err)
} else if err == nil {
log.Printf("successfully promoted etcd member")
}
}()
}
return restart.New(containerd.NewRunner( return restart.New(containerd.NewRunner(
r.Config().Debug(), r.Config().Debug(),
&args, &args,
@ -304,7 +330,7 @@ func addMember(ctx context.Context, r runtime.Runtime, addrs []string, name stri
} }
} }
add, err := client.MemberAdd(ctx, addrs) add, err := client.MemberAddAsLearner(ctx, addrs)
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("error adding member: %w", err) return nil, 0, fmt.Errorf("error adding member: %w", err)
} }
@ -317,7 +343,9 @@ func addMember(ctx context.Context, r runtime.Runtime, addrs []string, name stri
return list, add.Member.ID, nil return list, add.Member.ID, nil
} }
func buildInitialCluster(ctx context.Context, r runtime.Runtime, name, ip string) (initial string, err error) { func buildInitialCluster(ctx context.Context, r runtime.Runtime, name, ip string) (initial string, learnerMemberID uint64, err error) {
var id uint64
err = retry.Constant(10*time.Minute, err = retry.Constant(10*time.Minute,
retry.WithUnits(3*time.Second), retry.WithUnits(3*time.Second),
retry.WithJitter(time.Second), retry.WithJitter(time.Second),
@ -326,7 +354,6 @@ func buildInitialCluster(ctx context.Context, r runtime.Runtime, name, ip string
var ( var (
peerAddrs = []string{"https://" + net.FormatAddress(ip) + ":2380"} peerAddrs = []string{"https://" + net.FormatAddress(ip) + ":2380"}
resp *clientv3.MemberListResponse resp *clientv3.MemberListResponse
id uint64
) )
attemptCtx, attemptCtxCancel := context.WithTimeout(ctx, 30*time.Second) attemptCtx, attemptCtxCancel := context.WithTimeout(ctx, 30*time.Second)
@ -362,10 +389,10 @@ func buildInitialCluster(ctx context.Context, r runtime.Runtime, name, ip string
}) })
if err != nil { if err != nil {
return "", fmt.Errorf("failed to build cluster arguments: %w", err) return "", 0, fmt.Errorf("failed to build cluster arguments: %w", err)
} }
return initial, nil return initial, id, nil
} }
//nolint:gocyclo //nolint:gocyclo
@ -441,7 +468,7 @@ func (e *Etcd) argsForInit(ctx context.Context, r runtime.Runtime) error {
if upgraded { if upgraded {
denyListArgs.Set("initial-cluster-state", "existing") denyListArgs.Set("initial-cluster-state", "existing")
initialCluster, err = buildInitialCluster(ctx, r, hostname, primaryAddr) initialCluster, e.learnerMemberID, err = buildInitialCluster(ctx, r, hostname, primaryAddr)
if err != nil { if err != nil {
return err return err
} }
@ -534,7 +561,7 @@ func (e *Etcd) argsForControlPlane(ctx context.Context, r runtime.Runtime) error
if e.Bootstrap { if e.Bootstrap {
initialCluster = fmt.Sprintf("%s=https://%s:2380", hostname, net.FormatAddress(primaryAddr)) initialCluster = fmt.Sprintf("%s=https://%s:2380", hostname, net.FormatAddress(primaryAddr))
} else { } else {
initialCluster, err = buildInitialCluster(ctx, r, hostname, primaryAddr) initialCluster, e.learnerMemberID, err = buildInitialCluster(ctx, r, hostname, primaryAddr)
if err != nil { if err != nil {
return fmt.Errorf("failed to build initial etcd cluster: %w", err) return fmt.Errorf("failed to build initial etcd cluster: %w", err)
} }
@ -591,6 +618,27 @@ func (e *Etcd) recoverFromSnapshot(hostname, primaryAddr string) error {
return nil return nil
} }
func promoteMember(ctx context.Context, r runtime.Runtime, memberID uint64) error {
// try to promote a member until it succeeds (call might fail until the member catches up with the leader)
// promote member call will fail until member catches up with the master
return retry.Constant(10*time.Minute,
retry.WithUnits(10*time.Second),
retry.WithJitter(time.Second),
retry.WithErrorLogging(true),
).RetryWithContext(ctx, func(ctx context.Context) error {
client, err := etcd.NewClientFromControlPlaneIPs(ctx, r.Config().Cluster().CA(), r.Config().Cluster().Endpoint())
if err != nil {
return retry.ExpectedError(err)
}
defer client.Close() //nolint:errcheck
_, err = client.MemberPromote(ctx, memberID)
return retry.ExpectedError(err)
})
}
// IsDirEmpty checks if a directory is empty or not. // IsDirEmpty checks if a directory is empty or not.
func IsDirEmpty(name string) (bool, error) { func IsDirEmpty(name string) (bool, error) {
f, err := os.Open(name) f, err := os.Open(name)

View File

@ -7321,6 +7321,8 @@ type EtcdMember struct {
PeerUrls []string `protobuf:"bytes,4,rep,name=peer_urls,json=peerUrls,proto3" json:"peer_urls,omitempty"` PeerUrls []string `protobuf:"bytes,4,rep,name=peer_urls,json=peerUrls,proto3" json:"peer_urls,omitempty"`
// the list of URLs the member exposes to the cluster for communication. // the list of URLs the member exposes to the cluster for communication.
ClientUrls []string `protobuf:"bytes,5,rep,name=client_urls,json=clientUrls,proto3" json:"client_urls,omitempty"` ClientUrls []string `protobuf:"bytes,5,rep,name=client_urls,json=clientUrls,proto3" json:"client_urls,omitempty"`
// learner flag
IsLearner bool `protobuf:"varint,6,opt,name=is_learner,json=isLearner,proto3" json:"is_learner,omitempty"`
} }
func (x *EtcdMember) Reset() { func (x *EtcdMember) Reset() {
@ -7383,6 +7385,13 @@ func (x *EtcdMember) GetClientUrls() []string {
return nil return nil
} }
func (x *EtcdMember) GetIsLearner() bool {
if x != nil {
return x.IsLearner
}
return false
}
// EtcdMembers contains the list of members registered on the host. // EtcdMembers contains the list of members registered on the host.
type EtcdMembers struct { type EtcdMembers struct {
state protoimpl.MessageState state protoimpl.MessageState
@ -9474,15 +9483,17 @@ var file_machine_machine_proto_rawDesc = []byte{
0x73, 0x22, 0x38, 0x0a, 0x15, 0x45, 0x74, 0x63, 0x64, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x4c, 0x73, 0x22, 0x38, 0x0a, 0x15, 0x45, 0x74, 0x63, 0x64, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x4c,
0x69, 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x71, 0x75, 0x69, 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x71, 0x75,
0x65, 0x72, 0x79, 0x5f, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x65, 0x72, 0x79, 0x5f, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52,
0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x22, 0x76, 0x0a, 0x0a, 0x45, 0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x22, 0x95, 0x01, 0x0a, 0x0a,
0x74, 0x63, 0x64, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x45, 0x74, 0x63, 0x64, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64,
0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x68, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x68, 0x6f,
0x74, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x68, 0x6f, 0x73, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x68, 0x6f,
0x74, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x75, 0x72, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x75,
0x6c, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x70, 0x65, 0x65, 0x72, 0x55, 0x72, 0x72, 0x6c, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x70, 0x65, 0x65, 0x72, 0x55,
0x6c, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x75, 0x72, 0x6c, 0x72, 0x6c, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x75, 0x72,
0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x55, 0x6c, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74,
0x72, 0x6c, 0x73, 0x22, 0x91, 0x01, 0x0a, 0x0b, 0x45, 0x74, 0x63, 0x64, 0x4d, 0x65, 0x6d, 0x62, 0x55, 0x72, 0x6c, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x69, 0x73, 0x5f, 0x6c, 0x65, 0x61, 0x72, 0x6e,
0x65, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x69, 0x73, 0x4c, 0x65, 0x61, 0x72,
0x6e, 0x65, 0x72, 0x22, 0x91, 0x01, 0x0a, 0x0b, 0x45, 0x74, 0x63, 0x64, 0x4d, 0x65, 0x6d, 0x62,
0x65, 0x72, 0x73, 0x12, 0x2c, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x65, 0x72, 0x73, 0x12, 0x2c, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18,
0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2e, 0x4d, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2e, 0x4d,
0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,

View File

@ -6600,6 +6600,16 @@ func (m *EtcdMember) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
i -= len(m.unknownFields) i -= len(m.unknownFields)
copy(dAtA[i:], m.unknownFields) copy(dAtA[i:], m.unknownFields)
} }
if m.IsLearner {
i--
if m.IsLearner {
dAtA[i] = 1
} else {
dAtA[i] = 0
}
i--
dAtA[i] = 0x30
}
if len(m.ClientUrls) > 0 { if len(m.ClientUrls) > 0 {
for iNdEx := len(m.ClientUrls) - 1; iNdEx >= 0; iNdEx-- { for iNdEx := len(m.ClientUrls) - 1; iNdEx >= 0; iNdEx-- {
i -= len(m.ClientUrls[iNdEx]) i -= len(m.ClientUrls[iNdEx])
@ -10642,6 +10652,9 @@ func (m *EtcdMember) SizeVT() (n int) {
n += 1 + l + sov(uint64(l)) n += 1 + l + sov(uint64(l))
} }
} }
if m.IsLearner {
n += 2
}
if m.unknownFields != nil { if m.unknownFields != nil {
n += len(m.unknownFields) n += len(m.unknownFields)
} }
@ -26123,6 +26136,26 @@ func (m *EtcdMember) UnmarshalVT(dAtA []byte) error {
} }
m.ClientUrls = append(m.ClientUrls, string(dAtA[iNdEx:postIndex])) m.ClientUrls = append(m.ClientUrls, string(dAtA[iNdEx:postIndex]))
iNdEx = postIndex iNdEx = postIndex
case 6:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field IsLearner", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflow
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
v |= int(b&0x7F) << shift
if b < 0x80 {
break
}
}
m.IsLearner = bool(v != 0)
default: default:
iNdEx = preIndex iNdEx = preIndex
skippy, err := skip(dAtA[iNdEx:]) skippy, err := skip(dAtA[iNdEx:])

View File

@ -1023,6 +1023,7 @@ EtcdMember describes a single etcd member.
| hostname | [string](#string) | | human-readable name of the member. | | hostname | [string](#string) | | human-readable name of the member. |
| peer_urls | [string](#string) | repeated | the list of URLs the member exposes to clients for communication. | | peer_urls | [string](#string) | repeated | the list of URLs the member exposes to clients for communication. |
| client_urls | [string](#string) | repeated | the list of URLs the member exposes to the cluster for communication. | | client_urls | [string](#string) | repeated | the list of URLs the member exposes to the cluster for communication. |
| is_learner | [bool](#bool) | | learner flag |