mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: reject member removal that breaks the current active quorum
This commit is contained in:
parent
06e2338108
commit
a1ce07a321
@ -844,10 +844,9 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
|
func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
|
||||||
if s.Cfg.StrictReconfigCheck && !s.cluster.IsReadyToRemoveMember(id) {
|
// by default StrictReconfigCheck is enabled; reject removal if leads to quorum loss
|
||||||
// If s.cfg.StrictReconfigCheck is false, it means the option --strict-reconfig-check isn't passed to etcd.
|
if err := s.mayRemoveMember(types.ID(id)); err != nil {
|
||||||
// In such a case removing a member is allowed unconditionally
|
return err
|
||||||
return ErrNotEnoughStartedMembers
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cc := raftpb.ConfChange{
|
cc := raftpb.ConfChange{
|
||||||
@ -857,6 +856,32 @@ func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
|
|||||||
return s.configure(ctx, cc)
|
return s.configure(ctx, cc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *EtcdServer) mayRemoveMember(id types.ID) error {
|
||||||
|
if !s.Cfg.StrictReconfigCheck {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !s.cluster.IsReadyToRemoveMember(uint64(id)) {
|
||||||
|
plog.Warningf("not enough started members, rejecting remove member %s", id)
|
||||||
|
return ErrNotEnoughStartedMembers
|
||||||
|
}
|
||||||
|
|
||||||
|
// downed member is safe to remove since it's not part of the active quorum
|
||||||
|
if t := s.r.transport.ActiveSince(id); id != s.ID() && t.IsZero() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// protect quorum if some members are down
|
||||||
|
m := s.cluster.Members()
|
||||||
|
active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), m)
|
||||||
|
if (active - 1) < 1+((len(m)-1)/2) {
|
||||||
|
plog.Warningf("reconfigure breaks active quorum, rejecting remove member %s", id)
|
||||||
|
return ErrUnhealthy
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) UpdateMember(ctx context.Context, memb membership.Member) error {
|
func (s *EtcdServer) UpdateMember(ctx context.Context, memb membership.Member) error {
|
||||||
b, err := json.Marshal(memb)
|
b, err := json.Marshal(memb)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -25,13 +25,7 @@ import (
|
|||||||
// isConnectedToQuorumSince checks whether the local member is connected to the
|
// isConnectedToQuorumSince checks whether the local member is connected to the
|
||||||
// quorum of the cluster since the given time.
|
// quorum of the cluster since the given time.
|
||||||
func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
|
func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
|
||||||
var connectedNum int
|
return numConnectedSince(transport, since, self, members) >= (len(members)/2)+1
|
||||||
for _, m := range members {
|
|
||||||
if m.ID == self || isConnectedSince(transport, since, m.ID) {
|
|
||||||
connectedNum++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return connectedNum >= (len(members)+1)/2
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// isConnectedSince checks whether the local member is connected to the
|
// isConnectedSince checks whether the local member is connected to the
|
||||||
@ -44,10 +38,17 @@ func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote ty
|
|||||||
// isConnectedFullySince checks whether the local member is connected to all
|
// isConnectedFullySince checks whether the local member is connected to all
|
||||||
// members in the cluster since the given time.
|
// members in the cluster since the given time.
|
||||||
func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
|
func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
|
||||||
|
return numConnectedSince(transport, since, self, members) == len(members)
|
||||||
|
}
|
||||||
|
|
||||||
|
// numConnectedSince counts how many members are connected to the local member
|
||||||
|
// since the given time.
|
||||||
|
func numConnectedSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) int {
|
||||||
|
connectedNum := 0
|
||||||
for _, m := range members {
|
for _, m := range members {
|
||||||
if m.ID != self && !isConnectedSince(transport, since, m.ID) {
|
if m.ID == self || isConnectedSince(transport, since, m.ID) {
|
||||||
return false
|
connectedNum++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true
|
return connectedNum
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user