etcdserver: reject member removal that breaks the current active quorum

This commit is contained in:
Anthony Romano 2016-08-10 16:29:03 -07:00
parent 06e2338108
commit a1ce07a321
2 changed files with 40 additions and 14 deletions

View File

@ -844,10 +844,9 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) erro
} }
func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error { func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
if s.Cfg.StrictReconfigCheck && !s.cluster.IsReadyToRemoveMember(id) { // by default StrictReconfigCheck is enabled; reject removal if leads to quorum loss
// If s.cfg.StrictReconfigCheck is false, it means the option --strict-reconfig-check isn't passed to etcd. if err := s.mayRemoveMember(types.ID(id)); err != nil {
// In such a case removing a member is allowed unconditionally return err
return ErrNotEnoughStartedMembers
} }
cc := raftpb.ConfChange{ cc := raftpb.ConfChange{
@ -857,6 +856,32 @@ func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
return s.configure(ctx, cc) return s.configure(ctx, cc)
} }
func (s *EtcdServer) mayRemoveMember(id types.ID) error {
if !s.Cfg.StrictReconfigCheck {
return nil
}
if !s.cluster.IsReadyToRemoveMember(uint64(id)) {
plog.Warningf("not enough started members, rejecting remove member %s", id)
return ErrNotEnoughStartedMembers
}
// downed member is safe to remove since it's not part of the active quorum
if t := s.r.transport.ActiveSince(id); id != s.ID() && t.IsZero() {
return nil
}
// protect quorum if some members are down
m := s.cluster.Members()
active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), m)
if (active - 1) < 1+((len(m)-1)/2) {
plog.Warningf("reconfigure breaks active quorum, rejecting remove member %s", id)
return ErrUnhealthy
}
return nil
}
func (s *EtcdServer) UpdateMember(ctx context.Context, memb membership.Member) error { func (s *EtcdServer) UpdateMember(ctx context.Context, memb membership.Member) error {
b, err := json.Marshal(memb) b, err := json.Marshal(memb)
if err != nil { if err != nil {

View File

@ -25,13 +25,7 @@ import (
// isConnectedToQuorumSince checks whether the local member is connected to the // isConnectedToQuorumSince checks whether the local member is connected to the
// quorum of the cluster since the given time. // quorum of the cluster since the given time.
func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool { func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
var connectedNum int return numConnectedSince(transport, since, self, members) >= (len(members)/2)+1
for _, m := range members {
if m.ID == self || isConnectedSince(transport, since, m.ID) {
connectedNum++
}
}
return connectedNum >= (len(members)+1)/2
} }
// isConnectedSince checks whether the local member is connected to the // isConnectedSince checks whether the local member is connected to the
@ -44,10 +38,17 @@ func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote ty
// isConnectedFullySince checks whether the local member is connected to all // isConnectedFullySince checks whether the local member is connected to all
// members in the cluster since the given time. // members in the cluster since the given time.
func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool { func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
for _, m := range members { return numConnectedSince(transport, since, self, members) == len(members)
if m.ID != self && !isConnectedSince(transport, since, m.ID) { }
return false
} // numConnectedSince counts how many members are connected to the local member
} // since the given time.
return true func numConnectedSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) int {
connectedNum := 0
for _, m := range members {
if m.ID == self || isConnectedSince(transport, since, m.ID) {
connectedNum++
}
}
return connectedNum
} }