Fix ETCDCTL_API=2 etcdctl backup --with-v3 consistent index consistency

Prior to this CL, `ETCDCTL_API=2 etcdctl backup --with-v3` was readacting WAL log
(by removal of some entries), but was NOT updating consistent_index in the backend.
Also the WAL editing logic was buggy, as it didn't took in consideration the fact
that when TERM changes, there can be entries with duplicated indexes in
the log. So its NOT sufficient to subtract number of removed entries to
get accurate log indexes.

The PR replaces removing and shifting of WAL entries with replacing them with an no-op entries.
Thanks to this consistent-index references are staying up to date.

The PR also:
  - updates 'verification' logic to check whether consistent_index does not lag befor last snapshot
  - env-gated execution of verification framework in `etcdctl backup`.

Tested with:
```
(./build.sh && cd tests && EXPECT_DEBUG=TRUE 'env' 'go' 'test' '-timeout=300m' 'go.etcd.io/etcd/tests/v3/e2e' -run=TestCtlV2Backup --count=1000 2>&1 | tee TestCtlV2BackupV3.log)
```
This commit is contained in:
Piotr Tabor 2021-04-28 22:46:42 +02:00
parent adc365e14f
commit 911204cd76
5 changed files with 84 additions and 49 deletions

View File

@ -32,6 +32,7 @@ import (
"go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
"go.etcd.io/etcd/server/v3/etcdserver/cindex"
"go.etcd.io/etcd/server/v3/mvcc/backend"
"go.etcd.io/etcd/server/v3/verify"
"go.etcd.io/etcd/server/v3/wal"
"go.etcd.io/etcd/server/v3/wal/walpb"
@ -117,12 +118,13 @@ func handleBackup(c *cli.Context) error {
lg.Fatal("failed creating backup snapshot dir", zap.String("dest-snap", destSnap), zap.Error(err))
}
destDbPath := datadir.ToBackendFileName(destDir)
srcDbPath := datadir.ToBackendFileName(srcDir)
desired := newDesiredCluster()
walsnap := saveSnap(lg, destSnap, srcSnap, &desired)
metadata, state, ents := loadWAL(lg, srcWAL, walsnap, withV3)
destDbPath := datadir.ToBackendFileName(destDir)
saveDB(lg, destDbPath, datadir.ToBackendFileName(srcDir), state.Commit, &desired, withV3)
metadata, state, ents := translateWAL(lg, srcWAL, walsnap, withV3)
saveDB(lg, destDbPath, srcDbPath, state.Commit, &desired, withV3)
neww, err := wal.Create(lg, destWAL, pbutil.MustMarshal(&metadata))
if err != nil {
@ -183,7 +185,7 @@ func mustTranslateV2store(lg *zap.Logger, storeData []byte, desired *desiredClus
return outputData
}
func loadWAL(lg *zap.Logger, srcWAL string, walsnap walpb.Snapshot, v3 bool) (etcdserverpb.Metadata, raftpb.HardState, []raftpb.Entry) {
func translateWAL(lg *zap.Logger, srcWAL string, walsnap walpb.Snapshot, v3 bool) (etcdserverpb.Metadata, raftpb.HardState, []raftpb.Entry) {
w, err := wal.OpenForRead(lg, srcWAL, walsnap)
if err != nil {
lg.Fatal("wal.OpenForRead failed", zap.Error(err))
@ -202,18 +204,17 @@ func loadWAL(lg *zap.Logger, srcWAL string, walsnap walpb.Snapshot, v3 bool) (et
re := path.Join(membership.StoreMembersPrefix, "[[:xdigit:]]{1,16}", "attributes")
memberAttrRE := regexp.MustCompile(re)
removed := uint64(0)
i := 0
remove := func() {
ents = append(ents[:i], ents[i+1:]...)
removed++
i--
}
for i = 0; i < len(ents); i++ {
ents[i].Index -= removed
for i := 0; i < len(ents); i++ {
// Replacing WAL entries with 'dummy' entries allows to avoid
// complicated entries shifting and risk of other data (like consistent_index)
// running out of sync.
// Also moving entries and computing offsets would get complicated if
// TERM changes (so there are superflous entries from previous term).
if ents[i].Type == raftpb.EntryConfChange {
lg.Info("ignoring EntryConfChange raft entry")
remove()
raftEntryToNoOp(&ents[i])
continue
}
@ -227,18 +228,20 @@ func loadWAL(lg *zap.Logger, srcWAL string, walsnap walpb.Snapshot, v3 bool) (et
}
if v2Req != nil && v2Req.Method == "PUT" && memberAttrRE.MatchString(v2Req.Path) {
lg.Info("ignoring member attribute update on", zap.String("v2Req.Path", v2Req.Path))
remove()
lg.Info("ignoring member attribute update on",
zap.Stringer("entry", &ents[i]),
zap.String("v2Req.Path", v2Req.Path))
raftEntryToNoOp(&ents[i])
continue
}
if v2Req != nil {
continue
lg.Debug("preserving log entry", zap.Stringer("entry", &ents[i]))
}
if raftReq.ClusterMemberAttrSet != nil {
lg.Info("ignoring cluster_member_attr_set")
remove()
raftEntryToNoOp(&ents[i])
continue
}
@ -247,14 +250,20 @@ func loadWAL(lg *zap.Logger, srcWAL string, walsnap walpb.Snapshot, v3 bool) (et
continue
}
lg.Info("ignoring v3 raft entry")
remove()
raftEntryToNoOp(&ents[i])
}
state.Commit -= removed
var metadata etcdserverpb.Metadata
pbutil.MustUnmarshal(&metadata, wmetadata)
return metadata, state, ents
}
func raftEntryToNoOp(entry *raftpb.Entry) {
// Empty (dummy) entries are send by RAFT when new leader is getting elected.
// They do not cary any change to data-model so its safe to replace entries
// to be ignored with them.
*entry = raftpb.Entry{Term: entry.Term, Index: entry.Index, Type: raftpb.EntryNormal, Data: nil}
}
// saveDB copies the v3 backend and strips cluster information.
func saveDB(lg *zap.Logger, destDB, srcDB string, idx uint64, desired *desiredCluster, v3 bool) {
@ -272,7 +281,7 @@ func saveDB(lg *zap.Logger, destDB, srcDB string, idx uint64, desired *desiredCl
select {
case src = <-ch:
case <-time.After(time.Second):
lg.Fatal("waiting to acquire lock on", zap.String("srcDB", srcDB))
lg.Fatal("timed out waiting to acquire lock on", zap.String("srcDB", srcDB))
src = <-ch
}
defer src.Close()
@ -312,10 +321,13 @@ func saveDB(lg *zap.Logger, destDB, srcDB string, idx uint64, desired *desiredCl
tx := be.BatchTx()
tx.Lock()
defer tx.Unlock()
tx.UnsafeCreateBucket([]byte("meta"))
cindex.UnsafeCreateMetaBucket(tx)
ci := cindex.NewConsistentIndex(tx)
ci.SetConsistentIndex(idx)
ci.UnsafeSave(tx)
} else {
// Thanks to translateWAL not moving entries, but just replacing them with
// 'empty', there is no need to update the consistency index.
}
}

View File

@ -23,9 +23,9 @@ import (
)
var (
metaBucketName = []byte("meta")
MetaBucketName = []byte("meta")
consistentIndexKeyName = []byte("consistent_index")
ConsistentIndexKeyName = []byte("consistent_index")
)
// ConsistentIndexer is an interface that wraps the Get/Set/Save method for consistentIndex.
@ -52,14 +52,11 @@ type consistentIndex struct {
// it caches the "consistent_index" key's value. Accessed
// through atomics so must be 64-bit aligned.
consistentIndex uint64
// bytesBuf8 is a byte slice of length 8
// to avoid a repetitive allocation in saveIndex.
bytesBuf8 []byte
mutex sync.Mutex
mutex sync.Mutex
}
func NewConsistentIndex(tx backend.BatchTx) ConsistentIndexer {
return &consistentIndex{tx: tx, bytesBuf8: make([]byte, 8)}
return &consistentIndex{tx: tx}
}
func (ci *consistentIndex) ConsistentIndex() uint64 {
@ -69,14 +66,7 @@ func (ci *consistentIndex) ConsistentIndex() uint64 {
}
ci.mutex.Lock()
defer ci.mutex.Unlock()
ci.tx.Lock()
defer ci.tx.Unlock()
_, vs := ci.tx.UnsafeRange(metaBucketName, consistentIndexKeyName, nil, 0)
if len(vs) == 0 {
return 0
}
v := binary.BigEndian.Uint64(vs[0])
atomic.StoreUint64(&ci.consistentIndex, v)
v := ReadConsistentIndex(ci.tx)
return v
}
@ -85,11 +75,16 @@ func (ci *consistentIndex) SetConsistentIndex(v uint64) {
}
func (ci *consistentIndex) UnsafeSave(tx backend.BatchTx) {
bs := ci.bytesBuf8
binary.BigEndian.PutUint64(bs, ci.consistentIndex)
index := atomic.LoadUint64(&ci.consistentIndex)
if index == 0 {
// Never save 0 as it means that we didn't loaded the real index yet.
return
}
bs := make([]byte, 8) // this is kept on stack (not heap) so its quick.
binary.BigEndian.PutUint64(bs, index)
// put the index into the underlying backend
// tx has been locked in TxnBegin, so there is no need to lock it again
tx.UnsafePut(metaBucketName, consistentIndexKeyName, bs)
tx.UnsafePut(MetaBucketName, ConsistentIndexKeyName, bs)
}
func (ci *consistentIndex) SetBatchTx(tx backend.BatchTx) {
@ -112,3 +107,26 @@ func (f *fakeConsistentIndex) SetConsistentIndex(index uint64) {
func (f *fakeConsistentIndex) UnsafeSave(tx backend.BatchTx) {}
func (f *fakeConsistentIndex) SetBatchTx(tx backend.BatchTx) {}
func UnsafeCreateMetaBucket(tx backend.BatchTx) {
tx.UnsafeCreateBucket(MetaBucketName)
}
// unsafeGetConsistentIndex loads consistent index from given transaction.
// returns 0 if the data are not found.
func unsafeReadConsistentIndex(tx backend.ReadTx) uint64 {
_, vs := tx.UnsafeRange(MetaBucketName, ConsistentIndexKeyName, nil, 0)
if len(vs) == 0 {
return 0
}
v := binary.BigEndian.Uint64(vs[0])
return v
}
// ReadConsistentIndex loads consistent index from given transaction.
// returns 0 if the data are not found.
func ReadConsistentIndex(tx backend.ReadTx) uint64 {
tx.Lock()
defer tx.Unlock()
return unsafeReadConsistentIndex(tx)
}

View File

@ -34,7 +34,7 @@ func TestConsistentIndex(t *testing.T) {
t.Fatal("batch tx is nil")
}
tx.Lock()
tx.UnsafeCreateBucket(metaBucketName)
UnsafeCreateMetaBucket(tx)
tx.Unlock()
be.ForceCommit()
r := rand.Uint64()
@ -50,6 +50,7 @@ func TestConsistentIndex(t *testing.T) {
be.Close()
b := backend.NewDefaultBackend(tmpPath)
defer b.Close()
ci.SetConsistentIndex(0)
ci.SetBatchTx(b.BatchTx())
index = ci.ConsistentIndex()
@ -62,8 +63,6 @@ func TestConsistentIndex(t *testing.T) {
if index != r {
t.Errorf("expected %d,got %d", r, index)
}
b.Close()
}
func TestFakeConsistentIndex(t *testing.T) {

View File

@ -2256,6 +2256,9 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) {
clone := s.v2store.Clone()
// commit kv to write metadata (for example: consistent index) to disk.
//
// This guarantees that Backend's consistent_index is >= index of last snapshot.
//
// KV().commit() updates the consistent index in backend.
// All operations that update consistent index must be called sequentially
// from applyAll function.

View File

@ -76,7 +76,7 @@ func Verify(cfg Config) error {
be := backend.New(beConfig)
defer be.Close()
_, hardstate, err := validateWal(cfg)
snapshot, hardstate, err := validateWal(cfg)
if err != nil {
return err
}
@ -84,7 +84,7 @@ func Verify(cfg Config) error {
// TODO: Perform validation of consistency of membership between
// backend/members & WAL confstate (and maybe storev2 if still exists).
return validateConsistentIndex(cfg, hardstate, be)
return validateConsistentIndex(cfg, hardstate, snapshot, be)
}
// VerifyIfEnabled performs verification according to ETCD_VERIFY env settings.
@ -101,22 +101,25 @@ func VerifyIfEnabled(cfg Config) error {
// See Verify for more information.
func MustVerifyIfEnabled(cfg Config) {
if err := VerifyIfEnabled(cfg); err != nil {
cfg.Logger.Panic("Verification failed",
cfg.Logger.Fatal("Verification failed",
zap.String("data-dir", cfg.DataDir),
zap.Error(err))
}
}
func validateConsistentIndex(cfg Config, hardstate *raftpb.HardState, be backend.Backend) error {
func validateConsistentIndex(cfg Config, hardstate *raftpb.HardState, snapshot *walpb.Snapshot, be backend.Backend) error {
tx := be.BatchTx()
ci := cindex.NewConsistentIndex(tx)
index := ci.ConsistentIndex()
index := cindex.ReadConsistentIndex(tx)
if cfg.ExactIndex && index != hardstate.Commit {
return fmt.Errorf("backend.ConsistentIndex (%v) expected == WAL.HardState.commit (%v)", index, hardstate.Commit)
}
if index > hardstate.Commit {
return fmt.Errorf("backend.ConsistentIndex (%v) must be <= WAL.HardState.commit (%v)", index, hardstate.Commit)
}
if index < snapshot.Index {
return fmt.Errorf("backend.ConsistentIndex (%v) must be >= last snapshot index (%v)", index, snapshot.Index)
}
cfg.Logger.Info("verification: consistentIndex OK", zap.Uint64("backend-consistent-index", index), zap.Uint64("hardstate-commit", hardstate.Commit))
return nil
}