Fix remove member failed.

Signed-off-by: Max Neverov <neverov.max@gmail.com>
This commit is contained in:
Max Neverov 2024-04-15 15:56:59 +02:00
parent a7f5d4b4e4
commit 3b16aae947
3 changed files with 7 additions and 7 deletions

View File

@ -86,7 +86,7 @@ const (
StoreKeysPrefix = "/1"
// HealthInterval is the minimum time the cluster should be healthy
// before accepting add member requests.
// before accepting add and delete member requests.
HealthInterval = 5 * time.Second
purgeFileInterval = 30 * time.Second
@ -1592,14 +1592,13 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error {
}
// protect quorum if some members are down
m := s.cluster.VotingMembers()
active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.MemberID(), m)
if (active - 1) < 1+((len(m)-1)/2) {
since := time.Now().Add(-HealthInterval)
if !isConnectedToQuorumSince(s.r.transport, since, s.MemberID(), s.cluster.Members()) {
lg.Warn(
"rejecting member remove request; local member has not been connected to all peers, reconfigure breaks active quorum",
zap.String("local-member-id", s.MemberID().String()),
zap.String("requested-member-remove", id.String()),
zap.Int("active-peers", active),
zap.Int("active-peers", numConnectedSince(s.r.transport, since, s.MemberID(), s.cluster.Members())),
zap.Error(errors.ErrUnhealthy),
)
return errors.ErrUnhealthy

View File

@ -33,7 +33,7 @@ func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, s
// remote member since the given time.
func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote types.ID) bool {
t := transport.ActiveSince(remote)
return !t.IsZero() && t.Before(since)
return !t.IsZero() && !t.After(since)
}
// isConnectedFullySince checks whether the local member is connected to all

View File

@ -207,7 +207,8 @@ func TestMemberRemove(t *testing.T) {
testutils.ExecuteUntil(ctx, t, func() {
if quorumTc.waitForQuorum {
time.Sleep(etcdserver.HealthInterval)
// wait for health interval + leader election
time.Sleep(etcdserver.HealthInterval + 2*time.Second)
}
memberID, clusterID := memberToRemove(ctx, t, cc, c.ClusterSize)