mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: reject member removal that breaks the current active quorum
This commit is contained in:
parent
06e2338108
commit
a1ce07a321
@ -844,10 +844,9 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) erro
|
||||
}
|
||||
|
||||
func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
|
||||
if s.Cfg.StrictReconfigCheck && !s.cluster.IsReadyToRemoveMember(id) {
|
||||
// If s.cfg.StrictReconfigCheck is false, it means the option --strict-reconfig-check isn't passed to etcd.
|
||||
// In such a case removing a member is allowed unconditionally
|
||||
return ErrNotEnoughStartedMembers
|
||||
// by default StrictReconfigCheck is enabled; reject removal if leads to quorum loss
|
||||
if err := s.mayRemoveMember(types.ID(id)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cc := raftpb.ConfChange{
|
||||
@ -857,6 +856,32 @@ func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
|
||||
return s.configure(ctx, cc)
|
||||
}
|
||||
|
||||
func (s *EtcdServer) mayRemoveMember(id types.ID) error {
|
||||
if !s.Cfg.StrictReconfigCheck {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !s.cluster.IsReadyToRemoveMember(uint64(id)) {
|
||||
plog.Warningf("not enough started members, rejecting remove member %s", id)
|
||||
return ErrNotEnoughStartedMembers
|
||||
}
|
||||
|
||||
// downed member is safe to remove since it's not part of the active quorum
|
||||
if t := s.r.transport.ActiveSince(id); id != s.ID() && t.IsZero() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// protect quorum if some members are down
|
||||
m := s.cluster.Members()
|
||||
active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), m)
|
||||
if (active - 1) < 1+((len(m)-1)/2) {
|
||||
plog.Warningf("reconfigure breaks active quorum, rejecting remove member %s", id)
|
||||
return ErrUnhealthy
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *EtcdServer) UpdateMember(ctx context.Context, memb membership.Member) error {
|
||||
b, err := json.Marshal(memb)
|
||||
if err != nil {
|
||||
|
@ -25,13 +25,7 @@ import (
|
||||
// isConnectedToQuorumSince checks whether the local member is connected to the
|
||||
// quorum of the cluster since the given time.
|
||||
func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
|
||||
var connectedNum int
|
||||
for _, m := range members {
|
||||
if m.ID == self || isConnectedSince(transport, since, m.ID) {
|
||||
connectedNum++
|
||||
}
|
||||
}
|
||||
return connectedNum >= (len(members)+1)/2
|
||||
return numConnectedSince(transport, since, self, members) >= (len(members)/2)+1
|
||||
}
|
||||
|
||||
// isConnectedSince checks whether the local member is connected to the
|
||||
@ -44,10 +38,17 @@ func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote ty
|
||||
// isConnectedFullySince checks whether the local member is connected to all
|
||||
// members in the cluster since the given time.
|
||||
func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
|
||||
return numConnectedSince(transport, since, self, members) == len(members)
|
||||
}
|
||||
|
||||
// numConnectedSince counts how many members are connected to the local member
|
||||
// since the given time.
|
||||
func numConnectedSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) int {
|
||||
connectedNum := 0
|
||||
for _, m := range members {
|
||||
if m.ID != self && !isConnectedSince(transport, since, m.ID) {
|
||||
return false
|
||||
if m.ID == self || isConnectedSince(transport, since, m.ID) {
|
||||
connectedNum++
|
||||
}
|
||||
}
|
||||
return true
|
||||
return connectedNum
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user