diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 541cf797c..2f88bdec3 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -86,7 +86,7 @@ const ( StoreKeysPrefix = "/1" // HealthInterval is the minimum time the cluster should be healthy - // before accepting add member requests. + // before accepting add and delete member requests. HealthInterval = 5 * time.Second purgeFileInterval = 30 * time.Second @@ -1592,14 +1592,13 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error { } // protect quorum if some members are down - m := s.cluster.VotingMembers() - active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.MemberID(), m) - if (active - 1) < 1+((len(m)-1)/2) { + since := time.Now().Add(-HealthInterval) + if !isConnectedToQuorumSince(s.r.transport, since, s.MemberID(), s.cluster.Members()) { lg.Warn( "rejecting member remove request; local member has not been connected to all peers, reconfigure breaks active quorum", zap.String("local-member-id", s.MemberID().String()), zap.String("requested-member-remove", id.String()), - zap.Int("active-peers", active), + zap.Int("active-peers", numConnectedSince(s.r.transport, since, s.MemberID(), s.cluster.Members())), zap.Error(errors.ErrUnhealthy), ) return errors.ErrUnhealthy diff --git a/server/etcdserver/util.go b/server/etcdserver/util.go index fbba5491b..67ee3facc 100644 --- a/server/etcdserver/util.go +++ b/server/etcdserver/util.go @@ -33,7 +33,7 @@ func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, s // remote member since the given time. func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote types.ID) bool { t := transport.ActiveSince(remote) - return !t.IsZero() && t.Before(since) + return !t.IsZero() && !t.After(since) } // isConnectedFullySince checks whether the local member is connected to all diff --git a/tests/common/member_test.go b/tests/common/member_test.go index 1f2687c13..a6cb0bd16 100644 --- a/tests/common/member_test.go +++ b/tests/common/member_test.go @@ -207,7 +207,8 @@ func TestMemberRemove(t *testing.T) { testutils.ExecuteUntil(ctx, t, func() { if quorumTc.waitForQuorum { - time.Sleep(etcdserver.HealthInterval) + // wait for health interval + leader election + time.Sleep(etcdserver.HealthInterval + 2*time.Second) } memberID, clusterID := memberToRemove(ctx, t, cc, c.ClusterSize)