diff --git a/raft/raft.go b/raft/raft.go index 1b363a426..058de58fa 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -120,8 +120,18 @@ func (pr *Progress) waitSet(w int) { pr.Wait = w } func (pr *Progress) waitReset() { pr.Wait = 0 } func (pr *Progress) isUnreachable() bool { return pr.Unreachable } func (pr *Progress) reachable() { pr.Unreachable = false } -func (pr *Progress) unreachable() { pr.Unreachable = true } -func (pr *Progress) shouldWait() bool { return (pr.Unreachable || pr.Match == 0) && pr.Wait > 0 } + +func (pr *Progress) unreachable() { + pr.Unreachable = true + // When in optimistic appending path, if the remote becomes unreachable, + // there is big probability that it loses MsgApp. Fall back to bad + // path to recover it steadily. + if pr.Match != 0 { + pr.Next = pr.Match + 1 + } +} + +func (pr *Progress) shouldWait() bool { return (pr.Unreachable || pr.Match == 0) && pr.Wait > 0 } func (pr *Progress) hasPendingSnapshot() bool { return pr.PendingSnapshot != 0 } func (pr *Progress) setPendingSnapshot(i uint64) { pr.PendingSnapshot = i } diff --git a/raft/raft_test.go b/raft/raft_test.go index 79d90d130..6c2f35989 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -1300,10 +1300,14 @@ func TestUnreachable(t *testing.T) { // set node 2 to unreachable r.prs[2].Match = 3 - r.prs[2].Next = 4 + r.prs[2].Next = 5 r.prs[2].Wait = 0 r.prs[2].unreachable() + if wnext := r.prs[2].Match + 1; r.prs[2].Next != wnext { + t.Errorf("next = %d, want %d", r.prs[2].Next, wnext) + } + for i := 0; i < 3; i++ { // node 2 is unreachable, we expect that raft will only send out one msgAPP per heartbeat timeout r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}})