Save raftpb.ConfState in the backend.

This makes (bbolt) backend a full feature snapshot in term of WAL/raft,
i.e. carries:
  - commit : (applied_index)
  - confState

Benefits:
  - Backend will be a sufficient point in time definition sufficient to
start replaying WAL. We have applied_index & confState in consistent
state.
  - In case of emergency a backend state can be used for recovery
This commit is contained in:
Piotr Tabor
2021-05-13 12:37:43 +02:00
parent 3cb1ba4b2b
commit 865df75714
10 changed files with 283 additions and 64 deletions

View File

@@ -19,6 +19,7 @@ import (
"encoding/json"
"fmt"
"io/ioutil"
"math"
"net/http"
"os"
"path/filepath"
@@ -604,12 +605,14 @@ func TestApplyConfChangeShouldStop(t *testing.T) {
Node: newNodeNop(),
transport: newNopTransporter(),
})
lg := zaptest.NewLogger(t)
srv := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
lg: lg,
id: 1,
r: *r,
cluster: cl,
beHooks: &backendHooks{lg: lg},
}
cc := raftpb.ConfChange{
Type: raftpb.ConfChangeRemoveNode,
@@ -638,22 +641,26 @@ func TestApplyConfChangeShouldStop(t *testing.T) {
// TestApplyConfigChangeUpdatesConsistIndex ensures a config change also updates the consistIndex
// where consistIndex equals to applied index.
func TestApplyConfigChangeUpdatesConsistIndex(t *testing.T) {
lg := zaptest.NewLogger(t)
cl := membership.NewCluster(zaptest.NewLogger(t))
cl.SetStore(v2store.New())
cl.AddMember(&membership.Member{ID: types.ID(1)}, true)
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
Node: newNodeNop(),
transport: newNopTransporter(),
})
be, _ := betesting.NewDefaultTmpBackend(t)
defer betesting.Close(t, be)
cindex.CreateMetaBucket(be.BatchTx())
ci := cindex.NewConsistentIndex(be)
srv := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
lg: lg,
id: 1,
r: *r,
r: *realisticRaftNode(lg),
cluster: cl,
w: wait.New(),
consistIndex: cindex.NewFakeConsistentIndex(0),
consistIndex: ci,
beHooks: &backendHooks{lg: lg, indexer: ci},
}
// create EntryConfChange entry
@@ -680,29 +687,61 @@ func TestApplyConfigChangeUpdatesConsistIndex(t *testing.T) {
if consistIndex != appliedi {
t.Fatalf("consistIndex = %v, want %v", consistIndex, appliedi)
}
t.Run("verify-backend", func(t *testing.T) {
tx := be.BatchTx()
tx.Lock()
defer tx.Unlock()
srv.beHooks.OnPreCommitUnsafe(tx)
assert.Equal(t, raftpb.ConfState{Voters: []uint64{2}}, *membership.UnsafeConfStateFromBackend(lg, tx))
})
assert.Equal(t, consistIndex, cindex.ReadConsistentIndex(be.BatchTx()))
}
func realisticRaftNode(lg *zap.Logger) *raftNode {
storage := raft.NewMemoryStorage()
storage.SetHardState(raftpb.HardState{Commit: 0, Term: 0})
c := &raft.Config{
ID: 1,
ElectionTick: 10,
HeartbeatTick: 1,
Storage: storage,
MaxSizePerMsg: math.MaxUint64,
MaxInflightMsgs: 256,
}
n := raft.RestartNode(c)
r := newRaftNode(raftNodeConfig{
lg: lg,
Node: n,
transport: newNopTransporter(),
})
return r
}
// TestApplyMultiConfChangeShouldStop ensures that apply will return shouldStop
// if the local member is removed along with other conf updates.
func TestApplyMultiConfChangeShouldStop(t *testing.T) {
cl := membership.NewCluster(zaptest.NewLogger(t))
lg := zaptest.NewLogger(t)
cl := membership.NewCluster(lg)
cl.SetStore(v2store.New())
for i := 1; i <= 5; i++ {
cl.AddMember(&membership.Member{ID: types.ID(i)}, true)
}
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
lg: lg,
Node: newNodeNop(),
transport: newNopTransporter(),
})
ci := cindex.NewFakeConsistentIndex(0)
srv := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
lg: lg,
id: 2,
r: *r,
cluster: cl,
w: wait.New(),
consistIndex: cindex.NewFakeConsistentIndex(0),
consistIndex: ci,
beHooks: &backendHooks{lg: lg, indexer: ci},
}
ents := []raftpb.Entry{}
for i := 1; i <= 4; i++ {
@@ -1036,12 +1075,13 @@ func TestSnapshot(t *testing.T) {
// TestSnapshotOrdering ensures raft persists snapshot onto disk before
// snapshot db is applied.
func TestSnapshotOrdering(t *testing.T) {
lg := zaptest.NewLogger(t)
n := newNopReadyNode()
st := v2store.New()
cl := membership.NewCluster(zaptest.NewLogger(t))
cl := membership.NewCluster(lg)
cl.SetStore(st)
testdir, err := ioutil.TempDir(os.TempDir(), "testsnapdir")
testdir, err := ioutil.TempDir(t.TempDir(), "testsnapdir")
if err != nil {
t.Fatalf("couldn't open tempdir (%v)", err)
}
@@ -1056,29 +1096,30 @@ func TestSnapshotOrdering(t *testing.T) {
p := mockstorage.NewStorageRecorderStream(testdir)
tr, snapDoneC := newSnapTransporter(snapdir)
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
lg: lg,
isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
Node: n,
transport: tr,
storage: p,
raftStorage: rs,
})
be, tmpPath := betesting.NewDefaultTmpBackend(t)
defer os.RemoveAll(tmpPath)
be, _ := betesting.NewDefaultTmpBackend(t)
ci := cindex.NewConsistentIndex(be)
s := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
Cfg: config.ServerConfig{Logger: zap.NewExample(), DataDir: testdir, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
lg: lg,
Cfg: config.ServerConfig{Logger: lg, DataDir: testdir, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
r: *r,
v2store: st,
snapshotter: snap.New(zap.NewExample(), snapdir),
snapshotter: snap.New(lg, snapdir),
cluster: cl,
SyncTicker: &time.Ticker{},
consistIndex: cindex.NewConsistentIndex(be),
consistIndex: ci,
beHooks: &backendHooks{lg: lg, indexer: ci},
}
s.applyV2 = &applierV2store{store: s.v2store, cluster: s.cluster}
s.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, mvcc.StoreConfig{})
s.kv = mvcc.New(lg, be, &lease.FakeLessor{}, mvcc.StoreConfig{})
s.be = be
s.start()
@@ -1190,12 +1231,13 @@ func TestTriggerSnap(t *testing.T) {
// TestConcurrentApplyAndSnapshotV3 will send out snapshots concurrently with
// proposals.
func TestConcurrentApplyAndSnapshotV3(t *testing.T) {
lg := zaptest.NewLogger(t)
n := newNopReadyNode()
st := v2store.New()
cl := membership.NewCluster(zaptest.NewLogger(t))
cl := membership.NewCluster(lg)
cl.SetStore(st)
testdir, err := ioutil.TempDir(os.TempDir(), "testsnapdir")
testdir, err := ioutil.TempDir(t.TempDir(), "testsnapdir")
if err != nil {
t.Fatalf("Couldn't open tempdir (%v)", err)
}
@@ -1207,31 +1249,30 @@ func TestConcurrentApplyAndSnapshotV3(t *testing.T) {
rs := raft.NewMemoryStorage()
tr, snapDoneC := newSnapTransporter(testdir)
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
lg: lg,
isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
Node: n,
transport: tr,
storage: mockstorage.NewStorageRecorder(testdir),
raftStorage: rs,
})
be, tmpPath := betesting.NewDefaultTmpBackend(t)
defer func() {
os.RemoveAll(tmpPath)
}()
be, _ := betesting.NewDefaultTmpBackend(t)
ci := cindex.NewConsistentIndex(be)
s := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
Cfg: config.ServerConfig{Logger: zap.NewExample(), DataDir: testdir, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
lg: lg,
Cfg: config.ServerConfig{Logger: lg, DataDir: testdir, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
r: *r,
v2store: st,
snapshotter: snap.New(zap.NewExample(), testdir),
snapshotter: snap.New(lg, testdir),
cluster: cl,
SyncTicker: &time.Ticker{},
consistIndex: cindex.NewConsistentIndex(be),
consistIndex: ci,
beHooks: &backendHooks{lg: lg, indexer: ci},
}
s.applyV2 = &applierV2store{store: s.v2store, cluster: s.cluster}
s.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, mvcc.StoreConfig{})
s.kv = mvcc.New(lg, be, &lease.FakeLessor{}, mvcc.StoreConfig{})
s.be = be
s.start()
@@ -1288,6 +1329,7 @@ func TestConcurrentApplyAndSnapshotV3(t *testing.T) {
// TestAddMember tests AddMember can propose and perform node addition.
func TestAddMember(t *testing.T) {
lg := zaptest.NewLogger(t)
n := newNodeConfChangeCommitterRecorder()
n.readyc <- raft.Ready{
SoftState: &raft.SoftState{RaftState: raft.StateLeader},
@@ -1296,7 +1338,7 @@ func TestAddMember(t *testing.T) {
st := v2store.New()
cl.SetStore(st)
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
lg: lg,
Node: n,
raftStorage: raft.NewMemoryStorage(),
storage: mockstorage.NewStorageRecorder(""),
@@ -1304,13 +1346,14 @@ func TestAddMember(t *testing.T) {
})
s := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
lg: lg,
r: *r,
v2store: st,
cluster: cl,
reqIDGen: idutil.NewGenerator(0, time.Time{}),
SyncTicker: &time.Ticker{},
consistIndex: cindex.NewFakeConsistentIndex(0),
beHooks: &backendHooks{lg: lg},
}
s.start()
m := membership.Member{ID: 1234, RaftAttributes: membership.RaftAttributes{PeerURLs: []string{"foo"}}}
@@ -1332,6 +1375,7 @@ func TestAddMember(t *testing.T) {
// TestRemoveMember tests RemoveMember can propose and perform node removal.
func TestRemoveMember(t *testing.T) {
lg := zaptest.NewLogger(t)
n := newNodeConfChangeCommitterRecorder()
n.readyc <- raft.Ready{
SoftState: &raft.SoftState{RaftState: raft.StateLeader},
@@ -1341,7 +1385,7 @@ func TestRemoveMember(t *testing.T) {
cl.SetStore(v2store.New())
cl.AddMember(&membership.Member{ID: 1234}, true)
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
lg: lg,
Node: n,
raftStorage: raft.NewMemoryStorage(),
storage: mockstorage.NewStorageRecorder(""),
@@ -1356,6 +1400,7 @@ func TestRemoveMember(t *testing.T) {
reqIDGen: idutil.NewGenerator(0, time.Time{}),
SyncTicker: &time.Ticker{},
consistIndex: cindex.NewFakeConsistentIndex(0),
beHooks: &backendHooks{lg: lg},
}
s.start()
_, err := s.RemoveMember(context.Background(), 1234)
@@ -1376,6 +1421,7 @@ func TestRemoveMember(t *testing.T) {
// TestUpdateMember tests RemoveMember can propose and perform node update.
func TestUpdateMember(t *testing.T) {
lg := zaptest.NewLogger(t)
n := newNodeConfChangeCommitterRecorder()
n.readyc <- raft.Ready{
SoftState: &raft.SoftState{RaftState: raft.StateLeader},
@@ -1385,7 +1431,7 @@ func TestUpdateMember(t *testing.T) {
cl.SetStore(st)
cl.AddMember(&membership.Member{ID: 1234}, true)
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
lg: lg,
Node: n,
raftStorage: raft.NewMemoryStorage(),
storage: mockstorage.NewStorageRecorder(""),
@@ -1393,13 +1439,14 @@ func TestUpdateMember(t *testing.T) {
})
s := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
lg: lg,
r: *r,
v2store: st,
cluster: cl,
reqIDGen: idutil.NewGenerator(0, time.Time{}),
SyncTicker: &time.Ticker{},
consistIndex: cindex.NewFakeConsistentIndex(0),
beHooks: &backendHooks{lg: lg},
}
s.start()
wm := membership.Member{ID: 1234, RaftAttributes: membership.RaftAttributes{PeerURLs: []string{"http://127.0.0.1:1"}}}
@@ -1422,6 +1469,7 @@ func TestUpdateMember(t *testing.T) {
// TODO: test server could stop itself when being removed
func TestPublish(t *testing.T) {
lg := zaptest.NewLogger(t)
n := newNodeRecorder()
ch := make(chan interface{}, 1)
// simulate that request has gone through consensus
@@ -1430,11 +1478,11 @@ func TestPublish(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
srv := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
lg: lg,
readych: make(chan struct{}),
Cfg: config.ServerConfig{Logger: zap.NewExample(), TickMs: 1, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
Cfg: config.ServerConfig{Logger: lg, TickMs: 1, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
id: 1,
r: *newRaftNode(raftNodeConfig{lg: zap.NewExample(), Node: n}),
r: *newRaftNode(raftNodeConfig{lg: lg, Node: n}),
attributes: membership.Attributes{Name: "node1", ClientURLs: []string{"http://a", "http://b"}},
cluster: &membership.RaftCluster{},
w: w,
@@ -1476,16 +1524,17 @@ func TestPublish(t *testing.T) {
// TestPublishStopped tests that publish will be stopped if server is stopped.
func TestPublishStopped(t *testing.T) {
lg := zaptest.NewLogger(t)
ctx, cancel := context.WithCancel(context.Background())
r := newRaftNode(raftNodeConfig{
lg: zap.NewExample(),
lg: lg,
Node: newNodeNop(),
transport: newNopTransporter(),
})
srv := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
Cfg: config.ServerConfig{Logger: zap.NewExample(), TickMs: 1, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
lg: lg,
Cfg: config.ServerConfig{Logger: lg, TickMs: 1, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
r: *r,
cluster: &membership.RaftCluster{},
w: mockwait.NewNop(),
@@ -1504,13 +1553,15 @@ func TestPublishStopped(t *testing.T) {
// TestPublishRetry tests that publish will keep retry until success.
func TestPublishRetry(t *testing.T) {
lg := zaptest.NewLogger(t)
ctx, cancel := context.WithCancel(context.Background())
n := newNodeRecorderStream()
srv := &EtcdServer{
lgMu: new(sync.RWMutex),
lg: zap.NewExample(),
Cfg: config.ServerConfig{Logger: zap.NewExample(), TickMs: 1, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
r: *newRaftNode(raftNodeConfig{lg: zap.NewExample(), Node: n}),
lg: lg,
Cfg: config.ServerConfig{Logger: lg, TickMs: 1, SnapshotCatchUpEntries: DefaultSnapshotCatchUpEntries},
r: *newRaftNode(raftNodeConfig{lg: lg, Node: n}),
w: mockwait.NewNop(),
stopping: make(chan struct{}),
reqIDGen: idutil.NewGenerator(0, time.Time{}),