mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Verification package: Verified given data-dir.
For now verifies whete Backend.cindex is consistent with WAL log, but should get expanded to cover memberships & revisions.
This commit is contained in:
parent
6f8f506cf4
commit
47b28b600a
20
server/verify/doc.go
Normal file
20
server/verify/doc.go
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
// Copyright 2021 The etcd Authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package verify
|
||||||
|
|
||||||
|
// verify package is analyzing persistent state of etcd to find potential
|
||||||
|
// inconsistencies.
|
||||||
|
// In particular it covers cross-checking between different aspacts of etcd
|
||||||
|
// storage like WAL & Backend.
|
138
server/verify/verify.go
Normal file
138
server/verify/verify.go
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
// Copyright 2021 The etcd Authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package verify
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"go.etcd.io/etcd/raft/v3/raftpb"
|
||||||
|
"go.etcd.io/etcd/server/v3/datadir"
|
||||||
|
"go.etcd.io/etcd/server/v3/etcdserver/cindex"
|
||||||
|
"go.etcd.io/etcd/server/v3/mvcc/backend"
|
||||||
|
wal2 "go.etcd.io/etcd/server/v3/wal"
|
||||||
|
"go.etcd.io/etcd/server/v3/wal/walpb"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
const ENV_VERIFY = "ETCD_VERIFY"
|
||||||
|
const ENV_VERIFY_ALL_VALUE = "all"
|
||||||
|
|
||||||
|
type Config struct {
|
||||||
|
// DataDir is a root directory where the data being verified are stored.
|
||||||
|
DataDir string
|
||||||
|
|
||||||
|
// ExactIndex requires consistent_index in backend exactly match the last committed WAL entry.
|
||||||
|
// Usually backend's consistent_index needs to be <= WAL.commit, but for backups the match
|
||||||
|
// is expected to be exact.
|
||||||
|
ExactIndex bool
|
||||||
|
|
||||||
|
Logger *zap.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify performs consistency checks of given etcd data-directory.
|
||||||
|
// The errors are reported as the returned error, but for some situations
|
||||||
|
// the function can also panic.
|
||||||
|
// The function is expected to work on not-in-use data model, i.e.
|
||||||
|
// no file-locks should be taken. Verify does not modified the data.
|
||||||
|
func Verify(cfg Config) error {
|
||||||
|
lg := cfg.Logger
|
||||||
|
if lg == nil {
|
||||||
|
lg = zap.NewNop()
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
lg.Info("verification of persisted state", zap.String("data-dir", cfg.DataDir))
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
lg.Error("verification of persisted state failed",
|
||||||
|
zap.String("data-dir", cfg.DataDir),
|
||||||
|
zap.Error(err))
|
||||||
|
} else if r := recover(); r != nil {
|
||||||
|
lg.Error("verification of persisted state failed",
|
||||||
|
zap.String("data-dir", cfg.DataDir))
|
||||||
|
panic(r)
|
||||||
|
} else {
|
||||||
|
lg.Info("verification of persisted state successful", zap.String("data-dir", cfg.DataDir))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
beConfig := backend.DefaultBackendConfig()
|
||||||
|
beConfig.Path = datadir.ToBackendFileName(cfg.DataDir)
|
||||||
|
beConfig.Logger = cfg.Logger
|
||||||
|
|
||||||
|
be := backend.New(beConfig)
|
||||||
|
defer be.Close()
|
||||||
|
|
||||||
|
_, hardstate, err := validateWal(cfg)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Perform validation of consistency of membership between
|
||||||
|
// backend/members & WAL confstate (and maybe storev2 if still exists).
|
||||||
|
|
||||||
|
return validateConsistentIndex(cfg, hardstate, be)
|
||||||
|
}
|
||||||
|
|
||||||
|
// VerifyIfEnabled performs verification according to ETCD_VERIFY env settings.
|
||||||
|
// See Verify for more information.
|
||||||
|
func VerifyIfEnabled(cfg Config) error {
|
||||||
|
if os.Getenv(ENV_VERIFY) == ENV_VERIFY_ALL_VALUE {
|
||||||
|
return Verify(cfg)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustVerifyIfEnabled performs verification according to ETCD_VERIFY env settings
|
||||||
|
// and exits in case of found problems.
|
||||||
|
// See Verify for more information.
|
||||||
|
func MustVerifyIfEnabled(cfg Config) {
|
||||||
|
if err := VerifyIfEnabled(cfg); err != nil {
|
||||||
|
cfg.Logger.Panic("Verification failed",
|
||||||
|
zap.String("data-dir", cfg.DataDir),
|
||||||
|
zap.Error(err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateConsistentIndex(cfg Config, hardstate *raftpb.HardState, be backend.Backend) error {
|
||||||
|
tx := be.BatchTx()
|
||||||
|
ci := cindex.NewConsistentIndex(tx)
|
||||||
|
index := ci.ConsistentIndex()
|
||||||
|
if cfg.ExactIndex && index != hardstate.Commit {
|
||||||
|
return fmt.Errorf("backend.ConsistentIndex (%v) expected == WAL.HardState.commit (%v)", index, hardstate.Commit)
|
||||||
|
}
|
||||||
|
if index > hardstate.Commit {
|
||||||
|
return fmt.Errorf("backend.ConsistentIndex (%v) must be <= WAL.HardState.commit (%v)", index, hardstate.Commit)
|
||||||
|
}
|
||||||
|
cfg.Logger.Info("verification: consistentIndex OK", zap.Uint64("backend-consistent-index", index), zap.Uint64("hardstate-commit", hardstate.Commit))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateWal(cfg Config) (*walpb.Snapshot, *raftpb.HardState, error) {
|
||||||
|
walDir := datadir.ToWalDir(cfg.DataDir)
|
||||||
|
|
||||||
|
walSnaps, err := wal2.ValidSnapshotEntries(cfg.Logger, walDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot := walSnaps[len(walSnaps)-1]
|
||||||
|
hardstate, err := wal2.Verify(cfg.Logger, walDir, snapshot)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
return &snapshot, hardstate, nil
|
||||||
|
}
|
@ -618,10 +618,11 @@ func ValidSnapshotEntries(lg *zap.Logger, walDir string) ([]walpb.Snapshot, erro
|
|||||||
// If it cannot read out the expected snap, it will return ErrSnapshotNotFound.
|
// If it cannot read out the expected snap, it will return ErrSnapshotNotFound.
|
||||||
// If the loaded snap doesn't match with the expected one, it will
|
// If the loaded snap doesn't match with the expected one, it will
|
||||||
// return error ErrSnapshotMismatch.
|
// return error ErrSnapshotMismatch.
|
||||||
func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) error {
|
func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) (*raftpb.HardState, error) {
|
||||||
var metadata []byte
|
var metadata []byte
|
||||||
var err error
|
var err error
|
||||||
var match bool
|
var match bool
|
||||||
|
var state raftpb.HardState
|
||||||
|
|
||||||
rec := &walpb.Record{}
|
rec := &walpb.Record{}
|
||||||
|
|
||||||
@ -630,14 +631,14 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) error {
|
|||||||
}
|
}
|
||||||
names, nameIndex, err := selectWALFiles(lg, walDir, snap)
|
names, nameIndex, err := selectWALFiles(lg, walDir, snap)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// open wal files in read mode, so that there is no conflict
|
// open wal files in read mode, so that there is no conflict
|
||||||
// when the same WAL is opened elsewhere in write mode
|
// when the same WAL is opened elsewhere in write mode
|
||||||
rs, _, closer, err := openWALFiles(lg, walDir, names, nameIndex, false)
|
rs, _, closer, err := openWALFiles(lg, walDir, names, nameIndex, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
if closer != nil {
|
if closer != nil {
|
||||||
@ -652,7 +653,7 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) error {
|
|||||||
switch rec.Type {
|
switch rec.Type {
|
||||||
case metadataType:
|
case metadataType:
|
||||||
if metadata != nil && !bytes.Equal(metadata, rec.Data) {
|
if metadata != nil && !bytes.Equal(metadata, rec.Data) {
|
||||||
return ErrMetadataConflict
|
return nil, ErrMetadataConflict
|
||||||
}
|
}
|
||||||
metadata = rec.Data
|
metadata = rec.Data
|
||||||
case crcType:
|
case crcType:
|
||||||
@ -660,7 +661,7 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) error {
|
|||||||
// Current crc of decoder must match the crc of the record.
|
// Current crc of decoder must match the crc of the record.
|
||||||
// We need not match 0 crc, since the decoder is a new one at this point.
|
// We need not match 0 crc, since the decoder is a new one at this point.
|
||||||
if crc != 0 && rec.Validate(crc) != nil {
|
if crc != 0 && rec.Validate(crc) != nil {
|
||||||
return ErrCRCMismatch
|
return nil, ErrCRCMismatch
|
||||||
}
|
}
|
||||||
decoder.updateCRC(rec.Crc)
|
decoder.updateCRC(rec.Crc)
|
||||||
case snapshotType:
|
case snapshotType:
|
||||||
@ -668,7 +669,7 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) error {
|
|||||||
pbutil.MustUnmarshal(&loadedSnap, rec.Data)
|
pbutil.MustUnmarshal(&loadedSnap, rec.Data)
|
||||||
if loadedSnap.Index == snap.Index {
|
if loadedSnap.Index == snap.Index {
|
||||||
if loadedSnap.Term != snap.Term {
|
if loadedSnap.Term != snap.Term {
|
||||||
return ErrSnapshotMismatch
|
return nil, ErrSnapshotMismatch
|
||||||
}
|
}
|
||||||
match = true
|
match = true
|
||||||
}
|
}
|
||||||
@ -676,22 +677,23 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) error {
|
|||||||
// are not necessary for validating the WAL contents
|
// are not necessary for validating the WAL contents
|
||||||
case entryType:
|
case entryType:
|
||||||
case stateType:
|
case stateType:
|
||||||
|
pbutil.MustUnmarshal(&state, rec.Data)
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unexpected block type %d", rec.Type)
|
return nil, fmt.Errorf("unexpected block type %d", rec.Type)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We do not have to read out all the WAL entries
|
// We do not have to read out all the WAL entries
|
||||||
// as the decoder is opened in read mode.
|
// as the decoder is opened in read mode.
|
||||||
if err != io.EOF && err != io.ErrUnexpectedEOF {
|
if err != io.EOF && err != io.ErrUnexpectedEOF {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if !match {
|
if !match {
|
||||||
return ErrSnapshotNotFound
|
return nil, ErrSnapshotNotFound
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return &state, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// cut closes current file written and creates a new one ready to append.
|
// cut closes current file written and creates a new one ready to append.
|
||||||
|
@ -27,10 +27,12 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
"go.etcd.io/etcd/client/pkg/v3/fileutil"
|
"go.etcd.io/etcd/client/pkg/v3/fileutil"
|
||||||
"go.etcd.io/etcd/pkg/v3/pbutil"
|
"go.etcd.io/etcd/pkg/v3/pbutil"
|
||||||
"go.etcd.io/etcd/raft/v3/raftpb"
|
"go.etcd.io/etcd/raft/v3/raftpb"
|
||||||
"go.etcd.io/etcd/server/v3/wal/walpb"
|
"go.etcd.io/etcd/server/v3/wal/walpb"
|
||||||
|
"go.uber.org/zap/zaptest"
|
||||||
|
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
@ -231,14 +233,14 @@ func TestOpenAtIndex(t *testing.T) {
|
|||||||
// The test creates a WAL directory and cuts out multiple WAL files. Then
|
// The test creates a WAL directory and cuts out multiple WAL files. Then
|
||||||
// it corrupts one of the files by completely truncating it.
|
// it corrupts one of the files by completely truncating it.
|
||||||
func TestVerify(t *testing.T) {
|
func TestVerify(t *testing.T) {
|
||||||
|
lg := zaptest.NewLogger(t)
|
||||||
walDir, err := ioutil.TempDir(t.TempDir(), "waltest")
|
walDir, err := ioutil.TempDir(t.TempDir(), "waltest")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
defer os.RemoveAll(walDir)
|
|
||||||
|
|
||||||
// create WAL
|
// create WAL
|
||||||
w, err := Create(zap.NewExample(), walDir, nil)
|
w, err := Create(lg, walDir, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -255,11 +257,15 @@ func TestVerify(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hs := raftpb.HardState{Term: 1, Vote: 3, Commit: 5}
|
||||||
|
assert.NoError(t, w.Save(hs, nil))
|
||||||
|
|
||||||
// to verify the WAL is not corrupted at this point
|
// to verify the WAL is not corrupted at this point
|
||||||
err = Verify(zap.NewExample(), walDir, walpb.Snapshot{})
|
hardstate, err := Verify(lg, walDir, walpb.Snapshot{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("expected a nil error, got %v", err)
|
t.Errorf("expected a nil error, got %v", err)
|
||||||
}
|
}
|
||||||
|
assert.Equal(t, hs, *hardstate)
|
||||||
|
|
||||||
walFiles, err := ioutil.ReadDir(walDir)
|
walFiles, err := ioutil.ReadDir(walDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -272,7 +278,7 @@ func TestVerify(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = Verify(zap.NewExample(), walDir, walpb.Snapshot{})
|
_, err = Verify(lg, walDir, walpb.Snapshot{})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Error("expected a non-nil error, got nil")
|
t.Error("expected a non-nil error, got nil")
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user