mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
raft: proactively probe newly added followers
When the leader applied a new configuration that added voters, it would not immediately probe these voters, delaying when they would be caught up. I noticed this while writing an interaction-driven test, which has now been cleaned up and completed.
This commit is contained in:
parent
3d6721f751
commit
4e19150676
12
raft/raft.go
12
raft/raft.go
@ -1527,10 +1527,18 @@ func (r *raft) switchToConfig(cfg tracker.Config, prs tracker.ProgressMap) pb.Co
|
||||
if r.state != StateLeader || len(cs.Voters) == 0 {
|
||||
return cs
|
||||
}
|
||||
|
||||
if r.maybeCommit() {
|
||||
// The quorum size may have been reduced (but not to zero), so see if
|
||||
// any pending entries can be committed.
|
||||
// If the configuration change means that more entries are committed now,
|
||||
// broadcast/append to everyone in the updated config.
|
||||
r.bcastAppend()
|
||||
} else {
|
||||
// Otherwise, still probe the newly added replicas; there's no reason to
|
||||
// let them wait out a heartbeat interval (or the next incoming
|
||||
// proposal).
|
||||
r.prs.Visit(func(id uint64, pr *tracker.Progress) {
|
||||
r.maybeSendAppend(id, false /* sendIfEmpty */)
|
||||
})
|
||||
}
|
||||
// If the the leadTransferee was removed, abort the leadership transfer.
|
||||
if _, tOK := r.prs.Progress[r.leadTransferee]; !tOK && r.leadTransferee != 0 {
|
||||
|
194
raft/testdata/confchange_upreplicate_1_to_3_v2_auto.txt
vendored
Normal file
194
raft/testdata/confchange_upreplicate_1_to_3_v2_auto.txt
vendored
Normal file
@ -0,0 +1,194 @@
|
||||
# Run a V2 membership change that adds two voters at once and auto-leaves the
|
||||
# joint configuration.
|
||||
|
||||
# Bootstrap n1.
|
||||
add-nodes 1 voters=(1) index=2
|
||||
----
|
||||
INFO 1 switched to configuration voters=(1)
|
||||
INFO 1 became follower at term 0
|
||||
INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1]
|
||||
|
||||
campaign 1
|
||||
----
|
||||
INFO 1 is starting a new election at term 0
|
||||
INFO 1 became candidate at term 1
|
||||
INFO 1 received MsgVoteResp from 1 at term 1
|
||||
INFO 1 became leader at term 1
|
||||
|
||||
propose-conf-change 1
|
||||
v2 v3
|
||||
----
|
||||
ok
|
||||
|
||||
# Add two "empty" nodes to the cluster, n2 and n3.
|
||||
add-nodes 2
|
||||
----
|
||||
INFO 2 switched to configuration voters=()
|
||||
INFO 2 became follower at term 0
|
||||
INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0]
|
||||
INFO 3 switched to configuration voters=()
|
||||
INFO 3 became follower at term 0
|
||||
INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0]
|
||||
|
||||
# n1 immediately gets to commit & apply the conf change using only itself. We see that
|
||||
# it starts transitioning out of that joint configuration (though we will only see that
|
||||
# proposal in the next ready handling loop, when it is emitted).
|
||||
process-ready 1
|
||||
----
|
||||
INFO 1 switched to configuration voters=(1 2 3)&&(1) autoleave
|
||||
INFO initiating automatic transition out of joint configuration voters=(1 2 3)&&(1) autoleave
|
||||
Ready MustSync=true:
|
||||
Lead:1 State:StateLeader
|
||||
HardState Term:1 Vote:1 Commit:4
|
||||
Entries:
|
||||
1/3 EntryNormal ""
|
||||
1/4 EntryConfChangeV2 v2 v3
|
||||
CommittedEntries:
|
||||
1/3 EntryNormal ""
|
||||
1/4 EntryConfChangeV2 v2 v3
|
||||
|
||||
# n1 immediately probes n2 and n3.
|
||||
stabilize 1
|
||||
----
|
||||
> 1 handling Ready
|
||||
Ready MustSync=true:
|
||||
Entries:
|
||||
1/5 EntryConfChangeV2
|
||||
Messages:
|
||||
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3]
|
||||
1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3]
|
||||
|
||||
# First, play out the whole interaction between n1 and n2. We see n1's probe to
|
||||
# n2 get rejected (since n2 needs a snapshot); the snapshot is delivered at which
|
||||
# point n2 switches to the correct config, and n1 catches it up. This notably
|
||||
# includes the empty conf change which gets committed and applied by both and
|
||||
# which transitions them out of their joint configuration into the final one (1 2 3).
|
||||
stabilize 1 2
|
||||
----
|
||||
> delivering messages
|
||||
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3]
|
||||
INFO 2 [term: 0] received a MsgApp message with higher term from 1 [term: 1]
|
||||
INFO 2 became follower at term 1
|
||||
DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
|
||||
> 2 handling Ready
|
||||
Ready MustSync=true:
|
||||
Lead:1 State:StateFollower
|
||||
HardState Term:1 Commit:0
|
||||
Messages:
|
||||
2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0)
|
||||
> delivering messages
|
||||
2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0)
|
||||
DEBUG 1 received MsgAppResp(MsgApp was rejected, lastindex: 0) from 2 for index 3
|
||||
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1]
|
||||
DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1]
|
||||
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4]
|
||||
> 1 handling Ready
|
||||
Ready MustSync=false:
|
||||
Messages:
|
||||
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[]
|
||||
> delivering messages
|
||||
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[]
|
||||
INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1]
|
||||
INFO 2 switched to configuration voters=(1 2 3)&&(1)
|
||||
INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1]
|
||||
INFO 2 [commit: 4] restored snapshot [index: 4, term: 1]
|
||||
> 2 handling Ready
|
||||
Ready MustSync=false:
|
||||
HardState Term:1 Commit:4
|
||||
Snapshot Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[]
|
||||
Messages:
|
||||
2->1 MsgAppResp Term:1 Log:0/4
|
||||
> delivering messages
|
||||
2->1 MsgAppResp Term:1 Log:0/4
|
||||
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4]
|
||||
> 1 handling Ready
|
||||
Ready MustSync=false:
|
||||
Messages:
|
||||
1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2]
|
||||
> delivering messages
|
||||
1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2]
|
||||
> 2 handling Ready
|
||||
Ready MustSync=true:
|
||||
Entries:
|
||||
1/5 EntryConfChangeV2
|
||||
Messages:
|
||||
2->1 MsgAppResp Term:1 Log:0/5
|
||||
> delivering messages
|
||||
2->1 MsgAppResp Term:1 Log:0/5
|
||||
> 1 handling Ready
|
||||
INFO 1 switched to configuration voters=(1 2 3)
|
||||
Ready MustSync=false:
|
||||
HardState Term:1 Vote:1 Commit:5
|
||||
CommittedEntries:
|
||||
1/5 EntryConfChangeV2
|
||||
Messages:
|
||||
1->2 MsgApp Term:1 Log:1/5 Commit:5
|
||||
> delivering messages
|
||||
1->2 MsgApp Term:1 Log:1/5 Commit:5
|
||||
> 2 handling Ready
|
||||
INFO 2 switched to configuration voters=(1 2 3)
|
||||
Ready MustSync=false:
|
||||
HardState Term:1 Commit:5
|
||||
CommittedEntries:
|
||||
1/5 EntryConfChangeV2
|
||||
Messages:
|
||||
2->1 MsgAppResp Term:1 Log:0/5
|
||||
> delivering messages
|
||||
2->1 MsgAppResp Term:1 Log:0/5
|
||||
|
||||
# n3 immediately receives a snapshot in the final configuration.
|
||||
stabilize 1 3
|
||||
----
|
||||
> delivering messages
|
||||
1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3]
|
||||
INFO 3 [term: 0] received a MsgApp message with higher term from 1 [term: 1]
|
||||
INFO 3 became follower at term 1
|
||||
DEBUG 3 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
|
||||
> 3 handling Ready
|
||||
Ready MustSync=true:
|
||||
Lead:1 State:StateFollower
|
||||
HardState Term:1 Commit:0
|
||||
Messages:
|
||||
3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0)
|
||||
> delivering messages
|
||||
3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0)
|
||||
DEBUG 1 received MsgAppResp(MsgApp was rejected, lastindex: 0) from 3 for index 3
|
||||
DEBUG 1 decreased progress of 3 to [StateProbe match=0 next=1]
|
||||
DEBUG 1 [firstindex: 3, commit: 5] sent snapshot[index: 5, term: 1] to 3 [StateProbe match=0 next=1]
|
||||
DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=1 paused pendingSnap=5]
|
||||
> 1 handling Ready
|
||||
Ready MustSync=false:
|
||||
Messages:
|
||||
1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[]
|
||||
> delivering messages
|
||||
1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[]
|
||||
INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 5, term: 1]
|
||||
INFO 3 switched to configuration voters=(1 2 3)
|
||||
INFO 3 [commit: 5, lastindex: 5, lastterm: 1] restored snapshot [index: 5, term: 1]
|
||||
INFO 3 [commit: 5] restored snapshot [index: 5, term: 1]
|
||||
> 3 handling Ready
|
||||
Ready MustSync=false:
|
||||
HardState Term:1 Commit:5
|
||||
Snapshot Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[]
|
||||
Messages:
|
||||
3->1 MsgAppResp Term:1 Log:0/5
|
||||
> delivering messages
|
||||
3->1 MsgAppResp Term:1 Log:0/5
|
||||
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=5 next=6 paused pendingSnap=5]
|
||||
> 1 handling Ready
|
||||
Ready MustSync=false:
|
||||
Messages:
|
||||
1->3 MsgApp Term:1 Log:1/5 Commit:5
|
||||
> delivering messages
|
||||
1->3 MsgApp Term:1 Log:1/5 Commit:5
|
||||
> 3 handling Ready
|
||||
Ready MustSync=false:
|
||||
Messages:
|
||||
3->1 MsgAppResp Term:1 Log:0/5
|
||||
> delivering messages
|
||||
3->1 MsgAppResp Term:1 Log:0/5
|
||||
|
||||
# Nothing else happens.
|
||||
stabilize
|
||||
----
|
||||
ok
|
78
raft/testdata/confchange_v1.txt
vendored
78
raft/testdata/confchange_v1.txt
vendored
@ -1,78 +0,0 @@
|
||||
add-nodes 1 voters=(1) index=2
|
||||
----
|
||||
INFO 1 switched to configuration voters=(1)
|
||||
INFO 1 became follower at term 0
|
||||
INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1]
|
||||
|
||||
campaign 1
|
||||
----
|
||||
INFO 1 is starting a new election at term 0
|
||||
INFO 1 became candidate at term 1
|
||||
INFO 1 received MsgVoteResp from 1 at term 1
|
||||
INFO 1 became leader at term 1
|
||||
|
||||
propose-conf-change 1
|
||||
v2 v3
|
||||
----
|
||||
ok
|
||||
|
||||
add-nodes 2
|
||||
|
||||
process-ready 1
|
||||
----
|
||||
INFO 2 switched to configuration voters=()
|
||||
INFO 2 became follower at term 0
|
||||
INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0]
|
||||
INFO 3 switched to configuration voters=()
|
||||
INFO 3 became follower at term 0
|
||||
INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0]
|
||||
|
||||
stabilize 1
|
||||
----
|
||||
> 1 handling Ready
|
||||
INFO 1 switched to configuration voters=(1 2 3)&&(1) autoleave
|
||||
INFO initiating automatic transition out of joint configuration voters=(1 2 3)&&(1) autoleave
|
||||
Ready MustSync=true:
|
||||
Lead:1 State:StateLeader
|
||||
HardState Term:1 Vote:1 Commit:4
|
||||
Entries:
|
||||
1/3 EntryNormal ""
|
||||
1/4 EntryConfChangeV2 v2 v3
|
||||
CommittedEntries:
|
||||
1/3 EntryNormal ""
|
||||
1/4 EntryConfChangeV2 v2 v3
|
||||
> 1 handling Ready
|
||||
Ready MustSync=true:
|
||||
Entries:
|
||||
1/5 EntryConfChangeV2
|
||||
|
||||
# NB: this test is broken from here on because the leader doesn't propagate the
|
||||
# commit index proactively, see the buglet #11002.
|
||||
|
||||
stabilize 2
|
||||
----
|
||||
ok
|
||||
|
||||
stabilize 1
|
||||
----
|
||||
ok
|
||||
|
||||
stabilize 2
|
||||
----
|
||||
ok
|
||||
|
||||
stabilize 1
|
||||
----
|
||||
ok
|
||||
|
||||
stabilize 2
|
||||
----
|
||||
ok
|
||||
|
||||
stabilize 1
|
||||
----
|
||||
ok
|
||||
|
||||
stabilize 2
|
||||
----
|
||||
ok
|
Loading…
x
Reference in New Issue
Block a user