From b07be74a82919233e50d157288dd86fdfe763148 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Tue, 23 Sep 2014 23:09:21 -0700 Subject: [PATCH 1/2] raft: stop tickElection when the node is not in peer list This prevents the bug like this: 1. a node sends join to a cluster and succeeds 2. it starts with empty peers and waits for sync, but it have not received anything 3. election timeout passes, and it promotes itself to leader 4. it commits some log entry 5. its log conflicts with the cluster's --- raft/raft.go | 6 ++++++ raft/raft_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/raft/raft.go b/raft/raft.go index eb69f903d..35ef998ee 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -265,6 +265,12 @@ func (r *raft) appendEntry(e pb.Entry) { // tickElection is ran by followers and candidates after r.electionTimeout. func (r *raft) tickElection() { + // promotable indicates whether state machine can be promoted to leader, + // which is true when its own id is in progress list. + if _, promotable := r.prs[r.id]; !promotable { + r.elapsed = 0 + return + } r.elapsed++ // TODO (xiangli): elctionTimeout should be randomized. if r.elapsed > r.electionTimeout { diff --git a/raft/raft_test.go b/raft/raft_test.go index 106d4fd34..ca117f26c 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -1053,6 +1053,37 @@ func TestRemoveNode(t *testing.T) { } } +func TestTickElectionElapsed(t *testing.T) { + electionTimeout := 10 + tests := []struct { + promotable bool + e int + we int + }{ + {true, 0, 1}, + {true, electionTimeout - 1, electionTimeout}, + {true, electionTimeout, 0}, + {false, 0, 0}, + {false, 1, 0}, + } + for i, tt := range tests { + r := &raft{ + id: 1, + raftLog: newLog(), + prs: make(map[int64]*progress), + electionTimeout: electionTimeout, + elapsed: tt.e, + } + if tt.promotable { + r.prs[r.id] = &progress{} + } + r.tickElection() + if r.elapsed != tt.we { + t.Errorf("#%d: elapsed = %d, want %d", i, r.elapsed, tt.we) + } + } +} + func ents(terms ...int64) *raft { ents := []pb.Entry{{}} for _, term := range terms { From 1ca03d8e9d3168552c9d77e16cb60aaa3bc5f86f Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Wed, 24 Sep 2014 10:19:41 -0700 Subject: [PATCH 2/2] raft: move logic to separate func --- raft/raft.go | 11 ++++++++--- raft/raft_test.go | 35 +++++++++++++---------------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/raft/raft.go b/raft/raft.go index 35ef998ee..8af1ae4eb 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -265,9 +265,7 @@ func (r *raft) appendEntry(e pb.Entry) { // tickElection is ran by followers and candidates after r.electionTimeout. func (r *raft) tickElection() { - // promotable indicates whether state machine can be promoted to leader, - // which is true when its own id is in progress list. - if _, promotable := r.prs[r.id]; !promotable { + if !r.promotable() { r.elapsed = 0 return } @@ -536,6 +534,13 @@ func (r *raft) delProgress(id int64) { delete(r.prs, id) } +// promotable indicates whether state machine can be promoted to leader, +// which is true when its own id is in progress list. +func (r *raft) promotable() bool { + _, ok := r.prs[r.id] + return ok +} + func (r *raft) loadEnts(ents []pb.Entry) { r.raftLog.load(ents) } diff --git a/raft/raft_test.go b/raft/raft_test.go index ca117f26c..f343cba65 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -1053,33 +1053,24 @@ func TestRemoveNode(t *testing.T) { } } -func TestTickElectionElapsed(t *testing.T) { - electionTimeout := 10 +func TestPromotable(t *testing.T) { + id := int64(1) tests := []struct { - promotable bool - e int - we int + peers []int64 + wp bool }{ - {true, 0, 1}, - {true, electionTimeout - 1, electionTimeout}, - {true, electionTimeout, 0}, - {false, 0, 0}, - {false, 1, 0}, + {[]int64{1}, true}, + {[]int64{1, 2, 3}, true}, + {[]int64{}, false}, + {[]int64{2, 3}, false}, } for i, tt := range tests { - r := &raft{ - id: 1, - raftLog: newLog(), - prs: make(map[int64]*progress), - electionTimeout: electionTimeout, - elapsed: tt.e, + r := &raft{id: id, prs: make(map[int64]*progress)} + for _, id := range tt.peers { + r.prs[id] = &progress{} } - if tt.promotable { - r.prs[r.id] = &progress{} - } - r.tickElection() - if r.elapsed != tt.we { - t.Errorf("#%d: elapsed = %d, want %d", i, r.elapsed, tt.we) + if g := r.promotable(); g != tt.wp { + t.Errorf("#%d: promotable = %v, want %v", i, g, tt.wp) } } }