Merge pull request #14413 from tbg/raft-single-voter

raft: don't emit unstable CommittedEntries
This commit is contained in:
Benjamin Wang 2022-09-22 08:43:37 +08:00 committed by GitHub
commit 31d9664cb5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 716 additions and 376 deletions

View File

@ -55,7 +55,7 @@ func mustTemp(pre, body string) string {
} }
func ltoa(l *raftLog) string { func ltoa(l *raftLog) string {
s := fmt.Sprintf("committed: %d\n", l.committed) s := fmt.Sprintf("lastIndex: %d\n", l.lastIndex())
s += fmt.Sprintf("applied: %d\n", l.applied) s += fmt.Sprintf("applied: %d\n", l.applied)
for i, e := range l.allEntries() { for i, e := range l.allEntries() {
s += fmt.Sprintf("#%d: %+v\n", i, e) s += fmt.Sprintf("#%d: %+v\n", i, e)

View File

@ -211,11 +211,7 @@ type Peer struct {
Context []byte Context []byte
} }
// StartNode returns a new Node given configuration and a list of raft peers. func setupNode(c *Config, peers []Peer) *node {
// It appends a ConfChangeAddNode entry for each given peer to the initial log.
//
// Peers must not be zero length; call RestartNode in that case.
func StartNode(c *Config, peers []Peer) Node {
if len(peers) == 0 { if len(peers) == 0 {
panic("no peers given; use RestartNode instead") panic("no peers given; use RestartNode instead")
} }
@ -229,11 +225,19 @@ func StartNode(c *Config, peers []Peer) Node {
} }
n := newNode(rn) n := newNode(rn)
go n.run()
return &n return &n
} }
// StartNode returns a new Node given configuration and a list of raft peers.
// It appends a ConfChangeAddNode entry for each given peer to the initial log.
//
// Peers must not be zero length; call RestartNode in that case.
func StartNode(c *Config, peers []Peer) Node {
n := setupNode(c, peers)
go n.run()
return n
}
// RestartNode is similar to StartNode but does not take a list of peers. // RestartNode is similar to StartNode but does not take a list of peers.
// The current membership of the cluster will be restored from the Storage. // The current membership of the cluster will be restored from the Storage.
// If the caller has an existing state machine, pass in the last log index that // If the caller has an existing state machine, pass in the last log index that

View File

@ -35,6 +35,12 @@ import (
func readyWithTimeout(n Node) Ready { func readyWithTimeout(n Node) Ready {
select { select {
case rd := <-n.Ready(): case rd := <-n.Ready():
if nn, ok := n.(*nodeTestHarness); ok {
n = nn.node
}
if nn, ok := n.(*node); ok {
nn.rn.raft.logger.Infof("emitted ready: %s", DescribeReady(rd, nil))
}
return rd return rd
case <-time.After(time.Second): case <-time.After(time.Second):
panic("timed out waiting for ready") panic("timed out waiting for ready")
@ -126,6 +132,10 @@ func TestNodeStepUnblock(t *testing.T) {
func TestNodePropose(t *testing.T) { func TestNodePropose(t *testing.T) {
var msgs []raftpb.Message var msgs []raftpb.Message
appendStep := func(r *raft, m raftpb.Message) error { appendStep := func(r *raft, m raftpb.Message) error {
t.Log(DescribeMessage(m, nil))
if m.Type == raftpb.MsgAppResp {
return nil // injected by (*raft).advance
}
msgs = append(msgs, m) msgs = append(msgs, m)
return nil return nil
} }
@ -163,55 +173,6 @@ func TestNodePropose(t *testing.T) {
} }
} }
// TestNodeReadIndex ensures that node.ReadIndex sends the MsgReadIndex message to the underlying raft.
// It also ensures that ReadState can be read out through ready chan.
func TestNodeReadIndex(t *testing.T) {
var msgs []raftpb.Message
appendStep := func(r *raft, m raftpb.Message) error {
msgs = append(msgs, m)
return nil
}
wrs := []ReadState{{Index: uint64(1), RequestCtx: []byte("somedata")}}
s := newTestMemoryStorage(withPeers(1))
rn := newTestRawNode(1, 10, 1, s)
n := newNode(rn)
r := rn.raft
r.readStates = wrs
go n.run()
n.Campaign(context.TODO())
for {
rd := <-n.Ready()
if !reflect.DeepEqual(rd.ReadStates, wrs) {
t.Errorf("ReadStates = %v, want %v", rd.ReadStates, wrs)
}
s.Append(rd.Entries)
if rd.SoftState.Lead == r.id {
n.Advance()
break
}
n.Advance()
}
r.step = appendStep
wrequestCtx := []byte("somedata2")
n.ReadIndex(context.TODO(), wrequestCtx)
n.Stop()
if len(msgs) != 1 {
t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1)
}
if msgs[0].Type != raftpb.MsgReadIndex {
t.Errorf("msg type = %d, want %d", msgs[0].Type, raftpb.MsgReadIndex)
}
if !bytes.Equal(msgs[0].Entries[0].Data, wrequestCtx) {
t.Errorf("data = %v, want %v", msgs[0].Entries[0].Data, wrequestCtx)
}
}
// TestDisableProposalForwarding ensures that proposals are not forwarded to // TestDisableProposalForwarding ensures that proposals are not forwarded to
// the leader when DisableProposalForwarding is true. // the leader when DisableProposalForwarding is true.
func TestDisableProposalForwarding(t *testing.T) { func TestDisableProposalForwarding(t *testing.T) {
@ -308,6 +269,9 @@ func TestNodeReadIndexToOldLeader(t *testing.T) {
func TestNodeProposeConfig(t *testing.T) { func TestNodeProposeConfig(t *testing.T) {
var msgs []raftpb.Message var msgs []raftpb.Message
appendStep := func(r *raft, m raftpb.Message) error { appendStep := func(r *raft, m raftpb.Message) error {
if m.Type == raftpb.MsgAppResp {
return nil // injected by (*raft).advance
}
msgs = append(msgs, m) msgs = append(msgs, m)
return nil return nil
} }
@ -352,30 +316,34 @@ func TestNodeProposeConfig(t *testing.T) {
// not affect the later propose to add new node. // not affect the later propose to add new node.
func TestNodeProposeAddDuplicateNode(t *testing.T) { func TestNodeProposeAddDuplicateNode(t *testing.T) {
s := newTestMemoryStorage(withPeers(1)) s := newTestMemoryStorage(withPeers(1))
rn := newTestRawNode(1, 10, 1, s) cfg := newTestConfig(1, 10, 1, s)
n := newNode(rn) ctx, cancel, n := newNodeTestHarness(context.Background(), t, cfg)
go n.run() defer cancel()
n.Campaign(context.TODO()) n.Campaign(ctx)
rdyEntries := make([]raftpb.Entry, 0) allCommittedEntries := make([]raftpb.Entry, 0)
ticker := time.NewTicker(time.Millisecond * 100) ticker := time.NewTicker(time.Millisecond * 100)
defer ticker.Stop() defer ticker.Stop()
done := make(chan struct{}) goroutineStopped := make(chan struct{})
stop := make(chan struct{})
applyConfChan := make(chan struct{}) applyConfChan := make(chan struct{})
rd := readyWithTimeout(n)
s.Append(rd.Entries)
n.Advance()
go func() { go func() {
defer close(done) defer close(goroutineStopped)
for { for {
select { select {
case <-stop: case <-ctx.Done():
return return
case <-ticker.C: case <-ticker.C:
n.Tick() n.Tick()
case rd := <-n.Ready(): case rd := <-n.Ready():
t.Log(DescribeReady(rd, nil))
s.Append(rd.Entries) s.Append(rd.Entries)
applied := false applied := false
for _, e := range rd.Entries { for _, e := range rd.CommittedEntries {
rdyEntries = append(rdyEntries, e) allCommittedEntries = append(allCommittedEntries, e)
switch e.Type { switch e.Type {
case raftpb.EntryNormal: case raftpb.EntryNormal:
case raftpb.EntryConfChange: case raftpb.EntryConfChange:
@ -395,32 +363,31 @@ func TestNodeProposeAddDuplicateNode(t *testing.T) {
cc1 := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} cc1 := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1}
ccdata1, _ := cc1.Marshal() ccdata1, _ := cc1.Marshal()
n.ProposeConfChange(context.TODO(), cc1) n.ProposeConfChange(ctx, cc1)
<-applyConfChan <-applyConfChan
// try add the same node again // try add the same node again
n.ProposeConfChange(context.TODO(), cc1) n.ProposeConfChange(ctx, cc1)
<-applyConfChan <-applyConfChan
// the new node join should be ok // the new node join should be ok
cc2 := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 2} cc2 := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 2}
ccdata2, _ := cc2.Marshal() ccdata2, _ := cc2.Marshal()
n.ProposeConfChange(context.TODO(), cc2) n.ProposeConfChange(ctx, cc2)
<-applyConfChan <-applyConfChan
close(stop) cancel()
<-done <-goroutineStopped
if len(rdyEntries) != 4 { if len(allCommittedEntries) != 4 {
t.Errorf("len(entry) = %d, want %d, %v\n", len(rdyEntries), 4, rdyEntries) t.Errorf("len(entry) = %d, want %d, %v\n", len(allCommittedEntries), 4, allCommittedEntries)
} }
if !bytes.Equal(rdyEntries[1].Data, ccdata1) { if !bytes.Equal(allCommittedEntries[1].Data, ccdata1) {
t.Errorf("data = %v, want %v", rdyEntries[1].Data, ccdata1) t.Errorf("data = %v, want %v", allCommittedEntries[1].Data, ccdata1)
} }
if !bytes.Equal(rdyEntries[3].Data, ccdata2) { if !bytes.Equal(allCommittedEntries[3].Data, ccdata2) {
t.Errorf("data = %v, want %v", rdyEntries[3].Data, ccdata2) t.Errorf("data = %v, want %v", allCommittedEntries[3].Data, ccdata2)
} }
n.Stop()
} }
// TestBlockProposal ensures that node will block proposal when it does not // TestBlockProposal ensures that node will block proposal when it does not
@ -463,6 +430,10 @@ func TestNodeProposeWaitDropped(t *testing.T) {
t.Logf("dropping message: %v", m.String()) t.Logf("dropping message: %v", m.String())
return ErrProposalDropped return ErrProposalDropped
} }
if m.Type == raftpb.MsgAppResp {
// This is produced by raft internally, see (*raft).advance.
return nil
}
msgs = append(msgs, m) msgs = append(msgs, m)
return nil return nil
} }
@ -495,7 +466,7 @@ func TestNodeProposeWaitDropped(t *testing.T) {
n.Stop() n.Stop()
if len(msgs) != 0 { if len(msgs) != 0 {
t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1) t.Fatalf("len(msgs) = %d, want %d", len(msgs), 0)
} }
} }
@ -580,9 +551,6 @@ func TestReadyContainUpdates(t *testing.T) {
// start with correct configuration change entries, and can accept and commit // start with correct configuration change entries, and can accept and commit
// proposals. // proposals.
func TestNodeStart(t *testing.T) { func TestNodeStart(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1}
ccdata, err := cc.Marshal() ccdata, err := cc.Marshal()
if err != nil { if err != nil {
@ -600,11 +568,17 @@ func TestNodeStart(t *testing.T) {
MustSync: true, MustSync: true,
}, },
{ {
HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1}, HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1},
Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}},
CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, CommittedEntries: []raftpb.Entry{{Term: 2, Index: 2, Data: nil}},
MustSync: true, MustSync: true,
}, },
{
HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1},
Entries: nil,
CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}},
MustSync: false,
},
} }
storage := NewMemoryStorage() storage := NewMemoryStorage()
c := &Config{ c := &Config{
@ -616,27 +590,44 @@ func TestNodeStart(t *testing.T) {
MaxInflightMsgs: 256, MaxInflightMsgs: 256,
} }
n := StartNode(c, []Peer{{ID: 1}}) n := StartNode(c, []Peer{{ID: 1}})
defer n.Stop() ctx, cancel, n := newNodeTestHarness(context.Background(), t, c, Peer{ID: 1})
g := <-n.Ready() defer cancel()
if !reflect.DeepEqual(g, wants[0]) {
t.Fatalf("#%d: g = %+v,\n w %+v", 1, g, wants[0]) {
} else { rd := <-n.Ready()
storage.Append(g.Entries) if !reflect.DeepEqual(rd, wants[0]) {
t.Fatalf("#1: rd = %+v,\n w %+v", rd, wants[0])
}
storage.Append(rd.Entries)
n.Advance() n.Advance()
} }
if err := n.Campaign(ctx); err != nil { if err := n.Campaign(ctx); err != nil {
t.Fatal(err) t.Fatal(err)
} }
rd := <-n.Ready()
storage.Append(rd.Entries) {
n.Advance() rd := <-n.Ready()
storage.Append(rd.Entries)
n.Advance()
}
n.Propose(ctx, []byte("foo")) n.Propose(ctx, []byte("foo"))
if g2 := <-n.Ready(); !reflect.DeepEqual(g2, wants[1]) { {
t.Errorf("#%d: g = %+v,\n w %+v", 2, g2, wants[1]) rd := <-n.Ready()
} else { if !reflect.DeepEqual(rd, wants[1]) {
storage.Append(g2.Entries) t.Errorf("#2: rd = %+v,\n w %+v", rd, wants[1])
}
storage.Append(rd.Entries)
n.Advance()
}
{
rd := <-n.Ready()
if !reflect.DeepEqual(rd, wants[2]) {
t.Errorf("#3: rd = %+v,\n w %+v", rd, wants[2])
}
storage.Append(rd.Entries)
n.Advance() n.Advance()
} }
@ -740,10 +731,7 @@ func TestNodeRestartFromSnapshot(t *testing.T) {
} }
func TestNodeAdvance(t *testing.T) { func TestNodeAdvance(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background()) storage := newTestMemoryStorage(withPeers(1))
defer cancel()
storage := NewMemoryStorage()
c := &Config{ c := &Config{
ID: 1, ID: 1,
ElectionTick: 10, ElectionTick: 10,
@ -752,21 +740,17 @@ func TestNodeAdvance(t *testing.T) {
MaxSizePerMsg: noLimit, MaxSizePerMsg: noLimit,
MaxInflightMsgs: 256, MaxInflightMsgs: 256,
} }
n := StartNode(c, []Peer{{ID: 1}}) ctx, cancel, n := newNodeTestHarness(context.Background(), t, c)
defer n.Stop() defer cancel()
rd := <-n.Ready()
n.Campaign(ctx)
rd := readyWithTimeout(n)
// Commit empty entry.
storage.Append(rd.Entries) storage.Append(rd.Entries)
n.Advance() n.Advance()
n.Campaign(ctx)
<-n.Ready()
n.Propose(ctx, []byte("foo")) n.Propose(ctx, []byte("foo"))
select { rd = readyWithTimeout(n)
case rd = <-n.Ready():
t.Fatalf("unexpected Ready before Advance: %+v", rd)
case <-time.After(time.Millisecond):
}
storage.Append(rd.Entries) storage.Append(rd.Entries)
n.Advance() n.Advance()
select { select {
@ -911,15 +895,14 @@ func TestCommitPagination(t *testing.T) {
s := newTestMemoryStorage(withPeers(1)) s := newTestMemoryStorage(withPeers(1))
cfg := newTestConfig(1, 10, 1, s) cfg := newTestConfig(1, 10, 1, s)
cfg.MaxCommittedSizePerReady = 2048 cfg.MaxCommittedSizePerReady = 2048
rn, err := NewRawNode(cfg) ctx, cancel, n := newNodeTestHarness(context.Background(), t, cfg)
if err != nil { defer cancel()
t.Fatal(err) n.Campaign(ctx)
}
n := newNode(rn)
go n.run()
n.Campaign(context.TODO())
rd := readyWithTimeout(&n) rd := readyWithTimeout(n)
s.Append(rd.Entries)
n.Advance()
rd = readyWithTimeout(n)
if len(rd.CommittedEntries) != 1 { if len(rd.CommittedEntries) != 1 {
t.Fatalf("expected 1 (empty) entry, got %d", len(rd.CommittedEntries)) t.Fatalf("expected 1 (empty) entry, got %d", len(rd.CommittedEntries))
} }
@ -928,25 +911,32 @@ func TestCommitPagination(t *testing.T) {
blob := []byte(strings.Repeat("a", 1000)) blob := []byte(strings.Repeat("a", 1000))
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
if err := n.Propose(context.TODO(), blob); err != nil { if err := n.Propose(ctx, blob); err != nil {
t.Fatal(err) t.Fatal(err)
} }
} }
// First the three proposals have to be appended.
rd = readyWithTimeout(n)
if len(rd.Entries) != 3 {
t.Fatal("expected to see three entries")
}
s.Append(rd.Entries)
n.Advance()
// The 3 proposals will commit in two batches. // The 3 proposals will commit in two batches.
rd = readyWithTimeout(&n) rd = readyWithTimeout(n)
if len(rd.CommittedEntries) != 2 { if len(rd.CommittedEntries) != 2 {
t.Fatalf("expected 2 entries in first batch, got %d", len(rd.CommittedEntries)) t.Fatalf("expected 2 entries in first batch, got %d", len(rd.CommittedEntries))
} }
s.Append(rd.Entries) s.Append(rd.Entries)
n.Advance() n.Advance()
rd = readyWithTimeout(&n) rd = readyWithTimeout(n)
if len(rd.CommittedEntries) != 1 { if len(rd.CommittedEntries) != 1 {
t.Fatalf("expected 1 entry in second batch, got %d", len(rd.CommittedEntries)) t.Fatalf("expected 1 entry in second batch, got %d", len(rd.CommittedEntries))
} }
s.Append(rd.Entries) s.Append(rd.Entries)
n.Advance() n.Advance()
n.Stop()
} }
type ignoreSizeHintMemStorage struct { type ignoreSizeHintMemStorage struct {

110
raft/node_util_test.go Normal file
View File

@ -0,0 +1,110 @@
// Copyright 2022 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package raft
import (
"context"
"fmt"
"testing"
"time"
)
type nodeTestHarness struct {
*node
t *testing.T
}
func (l *nodeTestHarness) Debug(v ...interface{}) {
l.t.Log(v...)
}
func (l *nodeTestHarness) Debugf(format string, v ...interface{}) {
l.t.Logf(format, v...)
}
func (l *nodeTestHarness) Error(v ...interface{}) {
l.t.Error(v...)
}
func (l *nodeTestHarness) Errorf(format string, v ...interface{}) {
l.t.Errorf(format, v...)
}
func (l *nodeTestHarness) Info(v ...interface{}) {
l.t.Log(v...)
}
func (l *nodeTestHarness) Infof(format string, v ...interface{}) {
l.t.Logf(format, v...)
}
func (l *nodeTestHarness) Warning(v ...interface{}) {
l.t.Log(v...)
}
func (l *nodeTestHarness) Warningf(format string, v ...interface{}) {
l.t.Logf(format, v...)
}
func (l *nodeTestHarness) Fatal(v ...interface{}) {
l.t.Error(v...)
panic(v)
}
func (l *nodeTestHarness) Fatalf(format string, v ...interface{}) {
l.t.Errorf(format, v...)
panic(fmt.Sprintf(format, v...))
}
func (l *nodeTestHarness) Panic(v ...interface{}) {
l.t.Log(v...)
panic(v)
}
func (l *nodeTestHarness) Panicf(format string, v ...interface{}) {
l.t.Errorf(format, v...)
panic(fmt.Sprintf(format, v...))
}
func newNodeTestHarness(ctx context.Context, t *testing.T, cfg *Config, peers ...Peer) (_ context.Context, cancel func(), _ *nodeTestHarness) {
// Wrap context in a 10s timeout to make tests more robust. Otherwise,
// it's likely that deadlock will occur unless Node behaves exactly as
// expected - when you expect a Ready and start waiting on the channel
// but no Ready ever shows up, for example.
ctx, cancel = context.WithTimeout(ctx, 10*time.Second)
var n *node
if len(peers) > 0 {
n = setupNode(cfg, peers)
} else {
rn, err := NewRawNode(cfg)
if err != nil {
t.Fatal(err)
}
nn := newNode(rn)
n = &nn
}
go func() {
defer func() {
if r := recover(); r != nil {
t.Error(r)
}
}()
defer cancel()
defer n.Stop()
n.run()
}()
t.Cleanup(n.Stop)
return ctx, cancel, &nodeTestHarness{node: n, t: t}
}

View File

@ -572,6 +572,19 @@ func (r *raft) advance(rd Ready) {
if len(rd.Entries) > 0 { if len(rd.Entries) > 0 {
e := rd.Entries[len(rd.Entries)-1] e := rd.Entries[len(rd.Entries)-1]
if r.id == r.lead {
// The leader needs to self-ack the entries just appended (since it doesn't
// send an MsgApp to itself). This is roughly equivalent to:
//
// r.prs.Progress[r.id].MaybeUpdate(e.Index)
// if r.maybeCommit() {
// r.bcastAppend()
// }
_ = r.Step(pb.Message{From: r.id, Type: pb.MsgAppResp, Index: e.Index})
}
// NB: it's important for performance that this call happens after
// r.Step above on the leader. This is because r.Step can then use
// a fast-path for `r.raftLog.term()`.
r.raftLog.stableTo(e.Index, e.Term) r.raftLog.stableTo(e.Index, e.Term)
} }
if !IsEmptySnap(rd.Snapshot) { if !IsEmptySnap(rd.Snapshot) {
@ -634,10 +647,7 @@ func (r *raft) appendEntry(es ...pb.Entry) (accepted bool) {
return false return false
} }
// use latest "last" index after truncate/append // use latest "last" index after truncate/append
li = r.raftLog.append(es...) r.raftLog.append(es...)
r.prs.Progress[r.id].MaybeUpdate(li)
// Regardless of maybeCommit's return, our caller will call bcastAppend.
r.maybeCommit()
return true return true
} }
@ -735,7 +745,11 @@ func (r *raft) becomeLeader() {
// (perhaps after having received a snapshot as a result). The leader is // (perhaps after having received a snapshot as a result). The leader is
// trivially in this state. Note that r.reset() has initialized this // trivially in this state. Note that r.reset() has initialized this
// progress with the last index already. // progress with the last index already.
r.prs.Progress[r.id].BecomeReplicate() pr := r.prs.Progress[r.id]
pr.BecomeReplicate()
// The leader always has RecentActive == true; MsgCheckQuorum makes sure to
// preserve this.
pr.RecentActive = true
// Conservatively set the pendingConfIndex to the last index in the // Conservatively set the pendingConfIndex to the last index in the
// log. There may or may not be a pending config change, but it's // log. There may or may not be a pending config change, but it's
@ -995,15 +1009,6 @@ func stepLeader(r *raft, m pb.Message) error {
r.bcastHeartbeat() r.bcastHeartbeat()
return nil return nil
case pb.MsgCheckQuorum: case pb.MsgCheckQuorum:
// The leader should always see itself as active. As a precaution, handle
// the case in which the leader isn't in the configuration any more (for
// example if it just removed itself).
//
// TODO(tbg): I added a TODO in removeNode, it doesn't seem that the
// leader steps down when removing itself. I might be missing something.
if pr := r.prs.Progress[r.id]; pr != nil {
pr.RecentActive = true
}
if !r.prs.QuorumActive() { if !r.prs.QuorumActive() {
r.logger.Warningf("%x stepped down to follower since quorum is not active", r.id) r.logger.Warningf("%x stepped down to follower since quorum is not active", r.id)
r.becomeFollower(r.Term, None) r.becomeFollower(r.Term, None)
@ -1104,6 +1109,9 @@ func stepLeader(r *raft, m pb.Message) error {
} }
switch m.Type { switch m.Type {
case pb.MsgAppResp: case pb.MsgAppResp:
// NB: this code path is also hit from (*raft).advance, where the leader steps
// an MsgAppResp to acknowledge the appended entries in the last Ready.
pr.RecentActive = true pr.RecentActive = true
if m.Reject { if m.Reject {
@ -1272,7 +1280,9 @@ func stepLeader(r *raft, m pb.Message) error {
// replicate, or when freeTo() covers multiple messages). If // replicate, or when freeTo() covers multiple messages). If
// we have more entries to send, send as many messages as we // we have more entries to send, send as many messages as we
// can (without sending empty messages for the commit index) // can (without sending empty messages for the commit index)
for r.maybeSendAppend(m.From, false) { if r.id != m.From {
for r.maybeSendAppend(m.From, false) {
}
} }
// Transfer leadership is in progress. // Transfer leadership is in progress.
if m.From == r.leadTransferee && pr.Match == r.raftLog.lastIndex() { if m.From == r.leadTransferee && pr.Match == r.raftLog.lastIndex() {
@ -1811,6 +1821,11 @@ func numOfPendingConf(ents []pb.Entry) int {
} }
func releasePendingReadIndexMessages(r *raft) { func releasePendingReadIndexMessages(r *raft) {
if len(r.pendingReadIndexMessages) == 0 {
// Fast path for the common case to avoid a call to storage.LastIndex()
// via committedEntryInCurrentTerm.
return
}
if !r.committedEntryInCurrentTerm() { if !r.committedEntryInCurrentTerm() {
r.logger.Error("pending MsgReadIndex should be released only after first commit in current term") r.logger.Error("pending MsgReadIndex should be released only after first commit in current term")
return return

View File

@ -473,9 +473,9 @@ func TestLeaderCommitEntry(t *testing.T) {
// Reference: section 5.3 // Reference: section 5.3
func TestLeaderAcknowledgeCommit(t *testing.T) { func TestLeaderAcknowledgeCommit(t *testing.T) {
tests := []struct { tests := []struct {
size int size int
acceptors map[uint64]bool nonLeaderAcceptors map[uint64]bool
wack bool wack bool
}{ }{
{1, nil, true}, {1, nil, true},
{3, nil, false}, {3, nil, false},
@ -496,8 +496,11 @@ func TestLeaderAcknowledgeCommit(t *testing.T) {
li := r.raftLog.lastIndex() li := r.raftLog.lastIndex()
r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}})
for _, m := range r.readMessages() { rd := newReady(r, &SoftState{}, pb.HardState{})
if tt.acceptors[m.To] { s.Append(rd.Entries)
r.advance(rd) // simulate having appended entry on leader
for _, m := range rd.Messages {
if tt.nonLeaderAcceptors[m.To] {
r.Step(acceptAndReply(m)) r.Step(acceptAndReply(m))
} }
} }
@ -891,6 +894,9 @@ func TestLeaderOnlyCommitsLogFromCurrentTerm(t *testing.T) {
r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}})
r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Term: r.Term, Index: tt.index}) r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Term: r.Term, Index: tt.index})
rd := newReady(r, &SoftState{}, pb.HardState{})
storage.Append(rd.Entries)
r.advance(rd)
if r.raftLog.committed != tt.wcommit { if r.raftLog.committed != tt.wcommit {
t.Errorf("#%d: commit = %d, want %d", i, r.raftLog.committed, tt.wcommit) t.Errorf("#%d: commit = %d, want %d", i, r.raftLog.committed, tt.wcommit)
} }

View File

@ -29,13 +29,15 @@ import (
// nextEnts returns the appliable entries and updates the applied index // nextEnts returns the appliable entries and updates the applied index
func nextEnts(r *raft, s *MemoryStorage) (ents []pb.Entry) { func nextEnts(r *raft, s *MemoryStorage) (ents []pb.Entry) {
// Transfer all unstable entries to "stable" storage. for {
s.Append(r.raftLog.unstableEntries()) rd := newReady(r, &SoftState{}, pb.HardState{})
r.raftLog.stableTo(r.raftLog.lastIndex(), r.raftLog.lastTerm()) s.Append(rd.Entries)
r.advance(rd)
ents = r.raftLog.nextEnts() if len(rd.Entries)+len(rd.CommittedEntries) == 0 {
r.raftLog.appliedTo(r.raftLog.committed) return ents
return ents }
ents = append(ents, rd.CommittedEntries...)
}
} }
func mustAppendEntry(r *raft, ents ...pb.Entry) { func mustAppendEntry(r *raft, ents ...pb.Entry) {
@ -57,21 +59,33 @@ func (r *raft) readMessages() []pb.Message {
} }
func TestProgressLeader(t *testing.T) { func TestProgressLeader(t *testing.T) {
r := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) s := newTestMemoryStorage(withPeers(1, 2))
r := newTestRaft(1, 5, 1, s)
r.becomeCandidate() r.becomeCandidate()
r.becomeLeader() r.becomeLeader()
r.prs.Progress[2].BecomeReplicate() r.prs.Progress[2].BecomeReplicate()
// Send proposals to r1. The first 5 entries should be appended to the log. // Send proposals to r1. The first 5 entries should be queued in the unstable log.
propMsg := pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("foo")}}} propMsg := pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("foo")}}}
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
if pr := r.prs.Progress[r.id]; pr.State != tracker.StateReplicate || pr.Match != uint64(i+1) || pr.Next != pr.Match+1 {
t.Errorf("unexpected progress %v", pr)
}
if err := r.Step(propMsg); err != nil { if err := r.Step(propMsg); err != nil {
t.Fatalf("proposal resulted in error: %v", err) t.Fatalf("proposal resulted in error: %v", err)
} }
} }
if m := r.prs.Progress[1].Match; m != 0 {
t.Fatalf("expected zero match, got %d", m)
}
rd := newReady(r, &SoftState{}, pb.HardState{})
if len(rd.Entries) != 6 || len(rd.Entries[0].Data) > 0 || string(rd.Entries[5].Data) != "foo" {
t.Fatalf("unexpected Entries: %s", DescribeReady(rd, nil))
}
r.advance(rd)
if m := r.prs.Progress[1].Match; m != 6 {
t.Fatalf("unexpected Match %d", m)
}
if m := r.prs.Progress[1].Next; m != 7 {
t.Fatalf("unexpected Next %d", m)
}
} }
// TestProgressResumeByHeartbeatResp ensures raft.heartbeat reset progress.paused by heartbeat response. // TestProgressResumeByHeartbeatResp ensures raft.heartbeat reset progress.paused by heartbeat response.
@ -663,10 +677,12 @@ func TestLogReplication(t *testing.T) {
// TestLearnerLogReplication tests that a learner can receive entries from the leader. // TestLearnerLogReplication tests that a learner can receive entries from the leader.
func TestLearnerLogReplication(t *testing.T) { func TestLearnerLogReplication(t *testing.T) {
n1 := newTestLearnerRaft(1, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) s1 := newTestMemoryStorage(withPeers(1), withLearners(2))
n1 := newTestLearnerRaft(1, 10, 1, s1)
n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2)))
nt := newNetwork(n1, n2) nt := newNetwork(n1, n2)
nt.t = t
n1.becomeFollower(1, None) n1.becomeFollower(1, None)
n2.becomeFollower(1, None) n2.becomeFollower(1, None)
@ -686,12 +702,23 @@ func TestLearnerLogReplication(t *testing.T) {
t.Error("peer 2 state: not learner, want yes") t.Error("peer 2 state: not learner, want yes")
} }
nextCommitted := n1.raftLog.committed + 1 nextCommitted := uint64(2)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) {
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}})
rd := newReady(n1, &SoftState{}, pb.HardState{})
nt.send(rd.Messages...)
s1.Append(rd.Entries)
n1.advance(rd)
}
if n1.raftLog.committed != nextCommitted { if n1.raftLog.committed != nextCommitted {
t.Errorf("peer 1 wants committed to %d, but still %d", nextCommitted, n1.raftLog.committed) t.Errorf("peer 1 wants committed to %d, but still %d", nextCommitted, n1.raftLog.committed)
} }
{
rd := newReady(n1, &SoftState{}, pb.HardState{})
nt.send(rd.Messages...)
}
if n1.raftLog.committed != n2.raftLog.committed { if n1.raftLog.committed != n2.raftLog.committed {
t.Errorf("peer 2 wants committed to %d, but still %d", n1.raftLog.committed, n2.raftLog.committed) t.Errorf("peer 2 wants committed to %d, but still %d", n1.raftLog.committed, n2.raftLog.committed)
} }
@ -703,11 +730,18 @@ func TestLearnerLogReplication(t *testing.T) {
} }
func TestSingleNodeCommit(t *testing.T) { func TestSingleNodeCommit(t *testing.T) {
tt := newNetwork(nil) s := newTestMemoryStorage(withPeers(1))
cfg := newTestConfig(1, 10, 1, s)
r := newRaft(cfg)
tt := newNetwork(r)
tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}})
tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}})
rd := newReady(r, &SoftState{}, pb.HardState{})
s.Append(rd.Entries)
r.advance(rd)
sm := tt.peers[1].(*raft) sm := tt.peers[1].(*raft)
if sm.raftLog.committed != 3 { if sm.raftLog.committed != 3 {
t.Errorf("committed = %d, want %d", sm.raftLog.committed, 3) t.Errorf("committed = %d, want %d", sm.raftLog.committed, 3)
@ -792,9 +826,12 @@ func TestCommitWithoutNewTermEntry(t *testing.T) {
} }
func TestDuelingCandidates(t *testing.T) { func TestDuelingCandidates(t *testing.T) {
a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) s1 := newTestMemoryStorage(withPeers(1, 2, 3))
b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) s2 := newTestMemoryStorage(withPeers(1, 2, 3))
c := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) s3 := newTestMemoryStorage(withPeers(1, 2, 3))
a := newTestRaft(1, 10, 1, s1)
b := newTestRaft(2, 10, 1, s2)
c := newTestRaft(3, 10, 1, s3)
nt := newNetwork(a, b, c) nt := newNetwork(a, b, c)
nt.cut(1, 3) nt.cut(1, 3)
@ -820,21 +857,19 @@ func TestDuelingCandidates(t *testing.T) {
// we expect it to disrupt the leader 1 since it has a higher term // we expect it to disrupt the leader 1 since it has a higher term
// 3 will be follower again since both 1 and 2 rejects its vote request since 3 does not have a long enough log // 3 will be follower again since both 1 and 2 rejects its vote request since 3 does not have a long enough log
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
if sm.state != StateFollower {
wlog := &raftLog{ t.Errorf("state = %s, want %s", sm.state, StateFollower)
storage: &MemoryStorage{ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}}},
committed: 1,
unstable: unstable{offset: 2},
} }
tests := []struct { tests := []struct {
sm *raft sm *raft
state StateType state StateType
term uint64 term uint64
raftLog *raftLog lastIndex uint64
}{ }{
{a, StateFollower, 2, wlog}, {a, StateFollower, 2, 1},
{b, StateFollower, 2, wlog}, {b, StateFollower, 2, 1},
{c, StateFollower, 2, newLog(NewMemoryStorage(), raftLogger)}, {c, StateFollower, 2, 0},
} }
for i, tt := range tests { for i, tt := range tests {
@ -844,14 +879,8 @@ func TestDuelingCandidates(t *testing.T) {
if g := tt.sm.Term; g != tt.term { if g := tt.sm.Term; g != tt.term {
t.Errorf("#%d: term = %d, want %d", i, g, tt.term) t.Errorf("#%d: term = %d, want %d", i, g, tt.term)
} }
base := ltoa(tt.raftLog) if exp, act := tt.lastIndex, tt.sm.raftLog.lastIndex(); exp != act {
if sm, ok := nt.peers[1+uint64(i)].(*raft); ok { t.Errorf("#%d: last index exp = %d, act = %d", i, exp, act)
l := ltoa(sm.raftLog)
if g := diffu(base, l); g != "" {
t.Errorf("#%d: diff:\n%s", i, g)
}
} else {
t.Logf("#%d: empty log", i)
} }
} }
} }
@ -868,6 +897,7 @@ func TestDuelingPreCandidates(t *testing.T) {
c := newRaft(cfgC) c := newRaft(cfgC)
nt := newNetwork(a, b, c) nt := newNetwork(a, b, c)
nt.t = t
nt.cut(1, 3) nt.cut(1, 3)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
@ -891,20 +921,15 @@ func TestDuelingPreCandidates(t *testing.T) {
// With PreVote, it does not disrupt the leader. // With PreVote, it does not disrupt the leader.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
wlog := &raftLog{
storage: &MemoryStorage{ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}}},
committed: 1,
unstable: unstable{offset: 2},
}
tests := []struct { tests := []struct {
sm *raft sm *raft
state StateType state StateType
term uint64 term uint64
raftLog *raftLog lastIndex uint64
}{ }{
{a, StateLeader, 1, wlog}, {a, StateLeader, 1, 1},
{b, StateFollower, 1, wlog}, {b, StateFollower, 1, 1},
{c, StateFollower, 1, newLog(NewMemoryStorage(), raftLogger)}, {c, StateFollower, 1, 0},
} }
for i, tt := range tests { for i, tt := range tests {
@ -914,14 +939,8 @@ func TestDuelingPreCandidates(t *testing.T) {
if g := tt.sm.Term; g != tt.term { if g := tt.sm.Term; g != tt.term {
t.Errorf("#%d: term = %d, want %d", i, g, tt.term) t.Errorf("#%d: term = %d, want %d", i, g, tt.term)
} }
base := ltoa(tt.raftLog) if exp, act := tt.lastIndex, tt.sm.raftLog.lastIndex(); exp != act {
if sm, ok := nt.peers[1+uint64(i)].(*raft); ok { t.Errorf("#%d: last index is %d, exp %d", i, act, exp)
l := ltoa(sm.raftLog)
if g := diffu(base, l); g != "" {
t.Errorf("#%d: diff:\n%s", i, g)
}
} else {
t.Logf("#%d: empty log", i)
} }
} }
} }
@ -1058,6 +1077,7 @@ func TestProposal(t *testing.T) {
// promote 1 to become leader // promote 1 to become leader
send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: data}}}) send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: data}}})
r := tt.network.peers[1].(*raft)
wantLog := newLog(NewMemoryStorage(), raftLogger) wantLog := newLog(NewMemoryStorage(), raftLogger)
if tt.success { if tt.success {
@ -1065,8 +1085,8 @@ func TestProposal(t *testing.T) {
storage: &MemoryStorage{ storage: &MemoryStorage{
ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}}, ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}},
}, },
unstable: unstable{offset: 3}, unstable: unstable{offset: 3},
committed: 2} }
} }
base := ltoa(wantLog) base := ltoa(wantLog)
for i, p := range tt.peers { for i, p := range tt.peers {
@ -1079,8 +1099,7 @@ func TestProposal(t *testing.T) {
t.Logf("#%d: peer %d empty log", j, i) t.Logf("#%d: peer %d empty log", j, i)
} }
} }
sm := tt.network.peers[1].(*raft) if g := r.Term; g != 1 {
if g := sm.Term; g != 1 {
t.Errorf("#%d: term = %d, want %d", j, g, 1) t.Errorf("#%d: term = %d, want %d", j, g, 1)
} }
} }
@ -1405,14 +1424,14 @@ func TestRaftFreesReadOnlyMem(t *testing.T) {
// TestMsgAppRespWaitReset verifies the resume behavior of a leader // TestMsgAppRespWaitReset verifies the resume behavior of a leader
// MsgAppResp. // MsgAppResp.
func TestMsgAppRespWaitReset(t *testing.T) { func TestMsgAppRespWaitReset(t *testing.T) {
sm := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2, 3))) s := newTestMemoryStorage(withPeers(1, 2, 3))
sm := newTestRaft(1, 5, 1, s)
sm.becomeCandidate() sm.becomeCandidate()
sm.becomeLeader() sm.becomeLeader()
// The new leader has just emitted a new Term 4 entry; consume those messages // Run n1 which includes sending a message like the below
// from the outgoing queue. // one to n2, but also appending to its own log.
sm.bcastAppend() nextEnts(sm, s)
sm.readMessages()
// Node 2 acks the first entry, making it committed. // Node 2 acks the first entry, making it committed.
sm.Step(pb.Message{ sm.Step(pb.Message{
@ -2228,7 +2247,8 @@ func TestReadOnlyOptionSafe(t *testing.T) {
} }
func TestReadOnlyWithLearner(t *testing.T) { func TestReadOnlyWithLearner(t *testing.T) {
a := newTestLearnerRaft(1, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) s := newTestMemoryStorage(withPeers(1), withLearners(2))
a := newTestLearnerRaft(1, 10, 1, s)
b := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) b := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2)))
nt := newNetwork(a, b) nt := newNetwork(a, b)
@ -2258,6 +2278,7 @@ func TestReadOnlyWithLearner(t *testing.T) {
for i, tt := range tests { for i, tt := range tests {
for j := 0; j < tt.proposals; j++ { for j := 0; j < tt.proposals; j++ {
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}})
nextEnts(a, s) // append the entries on the leader
} }
nt.send(pb.Message{From: tt.sm.id, To: tt.sm.id, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: tt.wctx}}}) nt.send(pb.Message{From: tt.sm.id, To: tt.sm.id, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: tt.wctx}}})
@ -3634,13 +3655,17 @@ func TestLeaderTransferTimeout(t *testing.T) {
} }
func TestLeaderTransferIgnoreProposal(t *testing.T) { func TestLeaderTransferIgnoreProposal(t *testing.T) {
nt := newNetwork(nil, nil, nil) s := newTestMemoryStorage(withPeers(1, 2, 3))
r := newTestRaft(1, 10, 1, s)
nt := newNetwork(r, nil, nil)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
nt.isolate(3) nt.isolate(3)
lead := nt.peers[1].(*raft) lead := nt.peers[1].(*raft)
nextEnts(r, s) // handle empty entry
// Transfer leadership to isolated node to let transfer pending, then send proposal. // Transfer leadership to isolated node to let transfer pending, then send proposal.
nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader})
if lead.leadTransferee != 3 { if lead.leadTransferee != 3 {
@ -4630,6 +4655,8 @@ func votedWithConfig(configFunc func(*Config), vote, term uint64) *raft {
} }
type network struct { type network struct {
t *testing.T // optional
peers map[uint64]stateMachine peers map[uint64]stateMachine
storage map[uint64]*MemoryStorage storage map[uint64]*MemoryStorage
dropm map[connem]float64 dropm map[connem]float64
@ -4713,6 +4740,9 @@ func (nw *network) send(msgs ...pb.Message) {
for len(msgs) > 0 { for len(msgs) > 0 {
m := msgs[0] m := msgs[0]
p := nw.peers[m.To] p := nw.peers[m.To]
if nw.t != nil {
nw.t.Log(DescribeMessage(m, nil))
}
p.Step(m) p.Step(m)
msgs = append(msgs[1:], nw.filter(p.readMessages())...) msgs = append(msgs[1:], nw.filter(p.readMessages())...)
} }

View File

@ -388,122 +388,125 @@ func TestRawNodeJointAutoLeave(t *testing.T) {
} }
exp2Cs := pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}} exp2Cs := pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}
t.Run("", func(t *testing.T) { s := newTestMemoryStorage(withPeers(1))
s := newTestMemoryStorage(withPeers(1)) rawNode, err := NewRawNode(newTestConfig(1, 10, 1, s))
rawNode, err := NewRawNode(newTestConfig(1, 10, 1, s)) if err != nil {
if err != nil { t.Fatal(err)
t.Fatal(err) }
}
rawNode.Campaign() rawNode.Campaign()
proposed := false proposed := false
var ( var (
lastIndex uint64 lastIndex uint64
ccdata []byte ccdata []byte
) )
// Propose the ConfChange, wait until it applies, save the resulting // Propose the ConfChange, wait until it applies, save the resulting
// ConfState. // ConfState.
var cs *pb.ConfState var cs *pb.ConfState
for cs == nil { for cs == nil {
rd := rawNode.Ready() rd := rawNode.Ready()
s.Append(rd.Entries)
for _, ent := range rd.CommittedEntries {
var cc pb.ConfChangeI
if ent.Type == pb.EntryConfChangeV2 {
var ccc pb.ConfChangeV2
if err = ccc.Unmarshal(ent.Data); err != nil {
t.Fatal(err)
}
cc = &ccc
}
if cc != nil {
// Force it step down.
rawNode.Step(pb.Message{Type: pb.MsgHeartbeatResp, From: 1, Term: rawNode.raft.Term + 1})
cs = rawNode.ApplyConfChange(cc)
}
}
rawNode.Advance(rd)
// Once we are the leader, propose a command and a ConfChange.
if !proposed && rd.SoftState.Lead == rawNode.raft.id {
if err = rawNode.Propose([]byte("somedata")); err != nil {
t.Fatal(err)
}
ccdata, err = testCc.Marshal()
if err != nil {
t.Fatal(err)
}
rawNode.ProposeConfChange(testCc)
proposed = true
}
}
// Check that the last index is exactly the conf change we put in,
// down to the bits. Note that this comes from the Storage, which
// will not reflect any unstable entries that we'll only be presented
// with in the next Ready.
lastIndex, err = s.LastIndex()
if err != nil {
t.Fatal(err)
}
entries, err := s.Entries(lastIndex-1, lastIndex+1, noLimit)
if err != nil {
t.Fatal(err)
}
if len(entries) != 2 {
t.Fatalf("len(entries) = %d, want %d", len(entries), 2)
}
if !bytes.Equal(entries[0].Data, []byte("somedata")) {
t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, []byte("somedata"))
}
if entries[1].Type != pb.EntryConfChangeV2 {
t.Fatalf("type = %v, want %v", entries[1].Type, pb.EntryConfChangeV2)
}
if !bytes.Equal(entries[1].Data, ccdata) {
t.Errorf("data = %v, want %v", entries[1].Data, ccdata)
}
if !reflect.DeepEqual(&expCs, cs) {
t.Fatalf("exp:\n%+v\nact:\n%+v", expCs, cs)
}
if rawNode.raft.pendingConfIndex != 0 {
t.Fatalf("pendingConfIndex: expected %d, got %d", 0, rawNode.raft.pendingConfIndex)
}
// Move the RawNode along. It should not leave joint because it's follower.
rd := rawNode.readyWithoutAccept()
// Check that the right ConfChange comes out.
if len(rd.Entries) != 0 {
t.Fatalf("expected zero entry, got %+v", rd)
}
// Make it leader again. It should leave joint automatically after moving apply index.
rawNode.Campaign()
rd = rawNode.Ready()
s.Append(rd.Entries) s.Append(rd.Entries)
for _, ent := range rd.CommittedEntries {
var cc pb.ConfChangeI
if ent.Type == pb.EntryConfChangeV2 {
var ccc pb.ConfChangeV2
if err = ccc.Unmarshal(ent.Data); err != nil {
t.Fatal(err)
}
cc = &ccc
}
if cc != nil {
// Force it step down.
rawNode.Step(pb.Message{Type: pb.MsgHeartbeatResp, From: 1, Term: rawNode.raft.Term + 1})
cs = rawNode.ApplyConfChange(cc)
}
}
rawNode.Advance(rd) rawNode.Advance(rd)
rd = rawNode.Ready() // Once we are the leader, propose a command and a ConfChange.
s.Append(rd.Entries) if !proposed && rd.SoftState.Lead == rawNode.raft.id {
if err = rawNode.Propose([]byte("somedata")); err != nil {
t.Fatal(err)
}
ccdata, err = testCc.Marshal()
if err != nil {
t.Fatal(err)
}
rawNode.ProposeConfChange(testCc)
proposed = true
}
}
// Check that the right ConfChange comes out. // Check that the last index is exactly the conf change we put in,
if len(rd.Entries) != 1 || rd.Entries[0].Type != pb.EntryConfChangeV2 { // down to the bits. Note that this comes from the Storage, which
t.Fatalf("expected exactly one more entry, got %+v", rd) // will not reflect any unstable entries that we'll only be presented
} // with in the next Ready.
var cc pb.ConfChangeV2 lastIndex, err = s.LastIndex()
if err := cc.Unmarshal(rd.Entries[0].Data); err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if !reflect.DeepEqual(cc, pb.ConfChangeV2{Context: nil}) {
t.Fatalf("expected zero ConfChangeV2, got %+v", cc) entries, err := s.Entries(lastIndex-1, lastIndex+1, noLimit)
} if err != nil {
// Lie and pretend the ConfChange applied. It won't do so because now t.Fatal(err)
// we require the joint quorum and we're only running one node. }
cs = rawNode.ApplyConfChange(cc) if len(entries) != 2 {
if exp := exp2Cs; !reflect.DeepEqual(&exp, cs) { t.Fatalf("len(entries) = %d, want %d", len(entries), 2)
t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs) }
} if !bytes.Equal(entries[0].Data, []byte("somedata")) {
}) t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, []byte("somedata"))
}
if entries[1].Type != pb.EntryConfChangeV2 {
t.Fatalf("type = %v, want %v", entries[1].Type, pb.EntryConfChangeV2)
}
if !bytes.Equal(entries[1].Data, ccdata) {
t.Errorf("data = %v, want %v", entries[1].Data, ccdata)
}
if !reflect.DeepEqual(&expCs, cs) {
t.Fatalf("exp:\n%+v\nact:\n%+v", expCs, cs)
}
if rawNode.raft.pendingConfIndex != 0 {
t.Fatalf("pendingConfIndex: expected %d, got %d", 0, rawNode.raft.pendingConfIndex)
}
// Move the RawNode along. It should not leave joint because it's follower.
rd := rawNode.readyWithoutAccept()
// Check that the right ConfChange comes out.
if len(rd.Entries) != 0 {
t.Fatalf("expected zero entry, got %+v", rd)
}
// Make it leader again. It should leave joint automatically after moving apply index.
rawNode.Campaign()
rd = rawNode.Ready()
t.Log(DescribeReady(rd, nil))
s.Append(rd.Entries)
rawNode.Advance(rd)
rd = rawNode.Ready()
t.Log(DescribeReady(rd, nil))
s.Append(rd.Entries)
rawNode.Advance(rd)
rd = rawNode.Ready()
t.Log(DescribeReady(rd, nil))
s.Append(rd.Entries)
// Check that the right ConfChange comes out.
if len(rd.Entries) != 1 || rd.Entries[0].Type != pb.EntryConfChangeV2 {
t.Fatalf("expected exactly one more entry, got %+v", rd)
}
var cc pb.ConfChangeV2
if err := cc.Unmarshal(rd.Entries[0].Data); err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(cc, pb.ConfChangeV2{Context: nil}) {
t.Fatalf("expected zero ConfChangeV2, got %+v", cc)
}
// Lie and pretend the ConfChange applied. It won't do so because now
// we require the joint quorum and we're only running one node.
cs = rawNode.ApplyConfChange(cc)
if exp := exp2Cs; !reflect.DeepEqual(&exp, cs) {
t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs)
}
} }
// TestRawNodeProposeAddDuplicateNode ensures that two proposes to add the same node should // TestRawNodeProposeAddDuplicateNode ensures that two proposes to add the same node should
@ -656,18 +659,16 @@ func TestRawNodeReadIndex(t *testing.T) {
// requires the application to bootstrap the state, i.e. it does not accept peers // requires the application to bootstrap the state, i.e. it does not accept peers
// and will not create faux configuration change entries. // and will not create faux configuration change entries.
func TestRawNodeStart(t *testing.T) { func TestRawNodeStart(t *testing.T) {
entries := []pb.Entry{
{Term: 1, Index: 2, Data: nil}, // empty entry
{Term: 1, Index: 3, Data: []byte("foo")}, // empty entry
}
want := Ready{ want := Ready{
SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, SoftState: &SoftState{Lead: 1, RaftState: StateLeader},
HardState: pb.HardState{Term: 1, Commit: 3, Vote: 1}, HardState: pb.HardState{Term: 1, Commit: 3, Vote: 1},
Entries: []pb.Entry{ Entries: nil, // emitted & checked in intermediate Ready cycle
{Term: 1, Index: 2, Data: nil}, // empty entry CommittedEntries: entries,
{Term: 1, Index: 3, Data: []byte("foo")}, // empty entry MustSync: false, // since we're only applying, not appending
},
CommittedEntries: []pb.Entry{
{Term: 1, Index: 2, Data: nil}, // empty entry
{Term: 1, Index: 3, Data: []byte("foo")}, // empty entry
},
MustSync: true,
} }
storage := NewMemoryStorage() storage := NewMemoryStorage()
@ -747,9 +748,24 @@ func TestRawNodeStart(t *testing.T) {
t.Fatal("expected a Ready") t.Fatal("expected a Ready")
} }
rd := rawNode.Ready() rd := rawNode.Ready()
if !reflect.DeepEqual(entries, rd.Entries) {
t.Fatalf("expected to see entries\n%s, not\n%s", DescribeEntries(entries, nil), DescribeEntries(rd.Entries, nil))
}
storage.Append(rd.Entries) storage.Append(rd.Entries)
rawNode.Advance(rd) rawNode.Advance(rd)
if !rawNode.HasReady() {
t.Fatal("expected a Ready")
}
rd = rawNode.Ready()
if len(rd.Entries) != 0 {
t.Fatalf("unexpected entries: %s", DescribeEntries(rd.Entries, nil))
}
if rd.MustSync {
t.Fatalf("should not need to sync")
}
rawNode.Advance(rd)
rd.SoftState, want.SoftState = nil, nil rd.SoftState, want.SoftState = nil, nil
if !reflect.DeepEqual(rd, want) { if !reflect.DeepEqual(rd, want) {
@ -868,17 +884,17 @@ func TestRawNodeStatus(t *testing.T) {
// TestNodeCommitPaginationAfterRestart. The anomaly here was even worse as the // TestNodeCommitPaginationAfterRestart. The anomaly here was even worse as the
// Raft group would forget to apply entries: // Raft group would forget to apply entries:
// //
// - node learns that index 11 is committed // - node learns that index 11 is committed
// - nextEnts returns index 1..10 in CommittedEntries (but index 10 already // - nextEnts returns index 1..10 in CommittedEntries (but index 10 already
// exceeds maxBytes), which isn't noticed internally by Raft // exceeds maxBytes), which isn't noticed internally by Raft
// - Commit index gets bumped to 10 // - Commit index gets bumped to 10
// - the node persists the HardState, but crashes before applying the entries // - the node persists the HardState, but crashes before applying the entries
// - upon restart, the storage returns the same entries, but `slice` takes a // - upon restart, the storage returns the same entries, but `slice` takes a
// different code path and removes the last entry. // different code path and removes the last entry.
// - Raft does not emit a HardState, but when the app calls Advance(), it bumps // - Raft does not emit a HardState, but when the app calls Advance(), it bumps
// its internal applied index cursor to 10 (when it should be 9) // its internal applied index cursor to 10 (when it should be 9)
// - the next Ready asks the app to apply index 11 (omitting index 10), losing a // - the next Ready asks the app to apply index 11 (omitting index 10), losing a
// write. // write.
func TestRawNodeCommitPaginationAfterRestart(t *testing.T) { func TestRawNodeCommitPaginationAfterRestart(t *testing.T) {
s := &ignoreSizeHintMemStorage{ s := &ignoreSizeHintMemStorage{
MemoryStorage: newTestMemoryStorage(withPeers(1)), MemoryStorage: newTestMemoryStorage(withPeers(1)),
@ -952,6 +968,7 @@ func TestRawNodeBoundedLogGrowthWithPartition(t *testing.T) {
data := []byte("testdata") data := []byte("testdata")
testEntry := pb.Entry{Data: data} testEntry := pb.Entry{Data: data}
maxEntrySize := uint64(maxEntries * PayloadSize(testEntry)) maxEntrySize := uint64(maxEntries * PayloadSize(testEntry))
t.Log("maxEntrySize", maxEntrySize)
s := newTestMemoryStorage(withPeers(1)) s := newTestMemoryStorage(withPeers(1))
cfg := newTestConfig(1, 10, 1, s) cfg := newTestConfig(1, 10, 1, s)
@ -960,20 +977,16 @@ func TestRawNodeBoundedLogGrowthWithPartition(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
rd := rawNode.Ready()
s.Append(rd.Entries)
rawNode.Advance(rd)
// Become the leader. // Become the leader and apply empty entry.
rawNode.Campaign() rawNode.Campaign()
for { for {
rd = rawNode.Ready() rd := rawNode.Ready()
s.Append(rd.Entries) s.Append(rd.Entries)
if rd.SoftState.Lead == rawNode.raft.id { rawNode.Advance(rd)
rawNode.Advance(rd) if len(rd.CommittedEntries) > 0 {
break break
} }
rawNode.Advance(rd)
} }
// Simulate a network partition while we make our proposals by never // Simulate a network partition while we make our proposals by never
@ -995,12 +1008,25 @@ func TestRawNodeBoundedLogGrowthWithPartition(t *testing.T) {
// Recover from the partition. The uncommitted tail of the Raft log should // Recover from the partition. The uncommitted tail of the Raft log should
// disappear as entries are committed. // disappear as entries are committed.
rd = rawNode.Ready() rd := rawNode.Ready()
if len(rd.CommittedEntries) != maxEntries { if len(rd.Entries) != maxEntries {
t.Fatalf("expected %d entries, got %d", maxEntries, len(rd.CommittedEntries)) t.Fatalf("expected %d entries, got %d", maxEntries, len(rd.Entries))
} }
s.Append(rd.Entries) s.Append(rd.Entries)
rawNode.Advance(rd) rawNode.Advance(rd)
// Entries are appended, but not applied.
checkUncommitted(maxEntrySize)
rd = rawNode.Ready()
if len(rd.Entries) != 0 {
t.Fatalf("unexpected entries: %s", DescribeEntries(rd.Entries, nil))
}
if len(rd.CommittedEntries) != maxEntries {
t.Fatalf("expected %d entries, got %d", maxEntries, len(rd.CommittedEntries))
}
rawNode.Advance(rd)
checkUncommitted(0) checkUncommitted(0)
} }
@ -1105,3 +1131,104 @@ func TestRawNodeConsumeReady(t *testing.T) {
t.Fatalf("expected only m2 in raft.msgs, got %+v", rn.raft.msgs) t.Fatalf("expected only m2 in raft.msgs, got %+v", rn.raft.msgs)
} }
} }
func BenchmarkRawNode(b *testing.B) {
cases := []struct {
name string
peers []uint64
}{
{
name: "single-voter",
peers: []uint64{1},
},
{
name: "two-voters",
peers: []uint64{1, 2},
},
// You can easily add more cases here.
}
for _, tc := range cases {
b.Run(tc.name, func(b *testing.B) {
benchmarkRawNodeImpl(b, tc.peers...)
})
}
}
func benchmarkRawNodeImpl(b *testing.B, peers ...uint64) {
const debug = false
s := newTestMemoryStorage(withPeers(peers...))
cfg := newTestConfig(1, 10, 1, s)
if !debug {
cfg.Logger = discardLogger // avoid distorting benchmark output
}
rn, err := NewRawNode(cfg)
if err != nil {
b.Fatal(err)
}
run := make(chan struct{}, 1)
defer close(run)
var numReady uint64
stabilize := func() (applied uint64) {
for rn.HasReady() {
numReady++
rd := rn.Ready()
if debug {
b.Log(DescribeReady(rd, nil))
}
if n := len(rd.CommittedEntries); n > 0 {
applied = rd.CommittedEntries[n-1].Index
}
s.Append(rd.Entries)
for _, m := range rd.Messages {
if m.Type == pb.MsgVote {
resp := pb.Message{To: m.From, From: m.To, Term: m.Term, Type: pb.MsgVoteResp}
if debug {
b.Log(DescribeMessage(resp, nil))
}
rn.Step(resp)
}
if m.Type == pb.MsgApp {
idx := m.Index
if n := len(m.Entries); n > 0 {
idx = m.Entries[n-1].Index
}
resp := pb.Message{To: m.From, From: m.To, Type: pb.MsgAppResp, Term: m.Term, Index: idx}
if debug {
b.Log(DescribeMessage(resp, nil))
}
rn.Step(resp)
}
}
rn.Advance(rd)
}
return applied
}
rn.Campaign()
stabilize()
if debug {
b.N = 1
}
var applied uint64
for i := 0; i < b.N; i++ {
if err := rn.Propose([]byte("foo")); err != nil {
b.Fatal(err)
}
applied = stabilize()
}
if applied < uint64(b.N) {
b.Fatalf("did not apply everything: %d < %d", applied, b.N)
}
b.ReportMetric(float64(s.callStats.firstIndex)/float64(b.N), "firstIndex/op")
b.ReportMetric(float64(s.callStats.lastIndex)/float64(b.N), "lastIndex/op")
b.ReportMetric(float64(s.callStats.term)/float64(b.N), "term/op")
b.ReportMetric(float64(numReady)/float64(b.N), "ready/op")
b.Logf("storage access stats: %+v", s.callStats)
}

View File

@ -71,6 +71,10 @@ type Storage interface {
Snapshot() (pb.Snapshot, error) Snapshot() (pb.Snapshot, error)
} }
type inMemStorageCallStats struct {
initialState, firstIndex, lastIndex, entries, term, snapshot int
}
// MemoryStorage implements the Storage interface backed by an // MemoryStorage implements the Storage interface backed by an
// in-memory array. // in-memory array.
type MemoryStorage struct { type MemoryStorage struct {
@ -83,6 +87,8 @@ type MemoryStorage struct {
snapshot pb.Snapshot snapshot pb.Snapshot
// ents[i] has raft log position i+snapshot.Metadata.Index // ents[i] has raft log position i+snapshot.Metadata.Index
ents []pb.Entry ents []pb.Entry
callStats inMemStorageCallStats
} }
// NewMemoryStorage creates an empty MemoryStorage. // NewMemoryStorage creates an empty MemoryStorage.
@ -95,6 +101,7 @@ func NewMemoryStorage() *MemoryStorage {
// InitialState implements the Storage interface. // InitialState implements the Storage interface.
func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) { func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) {
ms.callStats.initialState++
return ms.hardState, ms.snapshot.Metadata.ConfState, nil return ms.hardState, ms.snapshot.Metadata.ConfState, nil
} }
@ -110,6 +117,7 @@ func (ms *MemoryStorage) SetHardState(st pb.HardState) error {
func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) { func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
ms.Lock() ms.Lock()
defer ms.Unlock() defer ms.Unlock()
ms.callStats.entries++
offset := ms.ents[0].Index offset := ms.ents[0].Index
if lo <= offset { if lo <= offset {
return nil, ErrCompacted return nil, ErrCompacted
@ -130,6 +138,7 @@ func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
func (ms *MemoryStorage) Term(i uint64) (uint64, error) { func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
ms.Lock() ms.Lock()
defer ms.Unlock() defer ms.Unlock()
ms.callStats.term++
offset := ms.ents[0].Index offset := ms.ents[0].Index
if i < offset { if i < offset {
return 0, ErrCompacted return 0, ErrCompacted
@ -144,6 +153,7 @@ func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
func (ms *MemoryStorage) LastIndex() (uint64, error) { func (ms *MemoryStorage) LastIndex() (uint64, error) {
ms.Lock() ms.Lock()
defer ms.Unlock() defer ms.Unlock()
ms.callStats.lastIndex++
return ms.lastIndex(), nil return ms.lastIndex(), nil
} }
@ -155,6 +165,7 @@ func (ms *MemoryStorage) lastIndex() uint64 {
func (ms *MemoryStorage) FirstIndex() (uint64, error) { func (ms *MemoryStorage) FirstIndex() (uint64, error) {
ms.Lock() ms.Lock()
defer ms.Unlock() defer ms.Unlock()
ms.callStats.firstIndex++
return ms.firstIndex(), nil return ms.firstIndex(), nil
} }
@ -166,6 +177,7 @@ func (ms *MemoryStorage) firstIndex() uint64 {
func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) { func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) {
ms.Lock() ms.Lock()
defer ms.Unlock() defer ms.Unlock()
ms.callStats.snapshot++
return ms.snapshot, nil return ms.snapshot, nil
} }

View File

@ -35,10 +35,13 @@ stabilize
> 1 handling Ready > 1 handling Ready
Ready MustSync=true: Ready MustSync=true:
Lead:1 State:StateLeader Lead:1 State:StateLeader
HardState Term:1 Vote:1 Commit:4 HardState Term:1 Vote:1 Commit:2
Entries: Entries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChange v2 1/4 EntryConfChange v2
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries: CommittedEntries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChange v2 1/4 EntryConfChange v2

View File

@ -31,19 +31,24 @@ INFO 3 switched to configuration voters=()
INFO 3 became follower at term 0 INFO 3 became follower at term 0
INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0]
# n1 immediately gets to commit & apply the conf change using only itself. We see that # Process n1 once, so that it can append the entry.
# it starts transitioning out of that joint configuration (though we will only see that
# proposal in the next ready handling loop, when it is emitted). We also see that this
# is using joint consensus, which it has to since we're carrying out two additions at
# once.
process-ready 1 process-ready 1
---- ----
Ready MustSync=true: Ready MustSync=true:
Lead:1 State:StateLeader Lead:1 State:StateLeader
HardState Term:1 Vote:1 Commit:4 HardState Term:1 Vote:1 Commit:2
Entries: Entries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 v3 1/4 EntryConfChangeV2 v2 v3
# Now n1 applies the conf change. We see that it starts transitioning out of that joint
# configuration (though we will only see that proposal in the next ready handling
# loop, when it is emitted). We also see that this is using joint consensus, which
# it has to since we're carrying out two additions at once.
process-ready 1
----
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries: CommittedEntries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 v3 1/4 EntryConfChangeV2 v2 v3

View File

@ -38,10 +38,13 @@ stabilize 1 2
> 1 handling Ready > 1 handling Ready
Ready MustSync=true: Ready MustSync=true:
Lead:1 State:StateLeader Lead:1 State:StateLeader
HardState Term:1 Vote:1 Commit:4 HardState Term:1 Vote:1 Commit:2
Entries: Entries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 1/4 EntryConfChangeV2 v2
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries: CommittedEntries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 1/4 EntryConfChangeV2 v2

View File

@ -36,10 +36,13 @@ stabilize
> 1 handling Ready > 1 handling Ready
Ready MustSync=true: Ready MustSync=true:
Lead:1 State:StateLeader Lead:1 State:StateLeader
HardState Term:1 Vote:1 Commit:4 HardState Term:1 Vote:1 Commit:2
Entries: Entries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 1/4 EntryConfChangeV2 v2
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries: CommittedEntries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 1/4 EntryConfChangeV2 v2

View File

@ -36,10 +36,13 @@ stabilize 1 2
> 1 handling Ready > 1 handling Ready
Ready MustSync=true: Ready MustSync=true:
Lead:1 State:StateLeader Lead:1 State:StateLeader
HardState Term:1 Vote:1 Commit:4 HardState Term:1 Vote:1 Commit:2
Entries: Entries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 1/4 EntryConfChangeV2 v2
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries: CommittedEntries:
1/3 EntryNormal "" 1/3 EntryNormal ""
1/4 EntryConfChangeV2 v2 1/4 EntryConfChangeV2 v2

30
raft/testdata/single_node.txt vendored Normal file
View File

@ -0,0 +1,30 @@
log-level info
----
ok
add-nodes 1 voters=(1) index=3
----
INFO 1 switched to configuration voters=(1)
INFO 1 became follower at term 0
INFO newRaft 1 [peers: [1], term: 0, commit: 3, applied: 3, lastindex: 3, lastterm: 1]
campaign 1
----
INFO 1 is starting a new election at term 0
INFO 1 became candidate at term 1
INFO 1 received MsgVoteResp from 1 at term 1
INFO 1 became leader at term 1
stabilize
----
> 1 handling Ready
Ready MustSync=true:
Lead:1 State:StateLeader
HardState Term:1 Vote:1 Commit:3
Entries:
1/4 EntryNormal ""
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries:
1/4 EntryNormal ""

View File

@ -41,7 +41,7 @@ ok
status 1 status 1
---- ----
1: StateReplicate match=11 next=12 inactive 1: StateReplicate match=11 next=12
2: StateReplicate match=11 next=12 2: StateReplicate match=11 next=12
3: StateProbe match=0 next=11 paused inactive 3: StateProbe match=0 next=11 paused inactive
@ -95,7 +95,7 @@ stabilize 1
status 1 status 1
---- ----
1: StateReplicate match=11 next=12 inactive 1: StateReplicate match=11 next=12
2: StateReplicate match=11 next=12 2: StateReplicate match=11 next=12
3: StateSnapshot match=0 next=11 paused pendingSnap=11 3: StateSnapshot match=0 next=11 paused pendingSnap=11
@ -132,7 +132,7 @@ stabilize 1
status 1 status 1
---- ----
1: StateReplicate match=11 next=12 inactive 1: StateReplicate match=11 next=12
2: StateReplicate match=11 next=12 2: StateReplicate match=11 next=12
3: StateReplicate match=11 next=12 3: StateReplicate match=11 next=12

View File

@ -52,8 +52,7 @@ type Progress struct {
// RecentActive is true if the progress is recently active. Receiving any messages // RecentActive is true if the progress is recently active. Receiving any messages
// from the corresponding follower indicates the progress is active. // from the corresponding follower indicates the progress is active.
// RecentActive can be reset to false after an election timeout. // RecentActive can be reset to false after an election timeout.
// // This is always true on the leader.
// TODO(tbg): the leader should always have this set to true.
RecentActive bool RecentActive bool
// ProbeSent is used while this follower is in StateProbe. When ProbeSent is // ProbeSent is used while this follower is in StateProbe. When ProbeSent is