Merge pull request #15043 from ptabor/20221223-log-diagnostics

etcd-dump-logs: Expand to allow diagnosing CRC corrupted problems in WAL log
This commit is contained in:
Piotr Tabor 2022-12-30 10:40:10 +01:00 committed by GitHub
commit 4ae4d9fe6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 391 additions and 180 deletions

1
.gitignore vendored
View File

@ -34,3 +34,4 @@ hack/tls-setup/certs
/tools/proto-annotations/proto-annotations
/tools/benchmark/benchmark
/out
/etcd-dump-logs

View File

@ -33,6 +33,13 @@ const minSectorSize = 512
// frameSizeBytes is frame size in bytes, including record size and padding size.
const frameSizeBytes = 8
type Decoder interface {
Decode(rec *walpb.Record) error
LastOffset() int64
LastCRC() uint32
UpdateCRC(prevCrc uint32)
}
type decoder struct {
mu sync.Mutex
brs []*fileutil.FileBufReader
@ -40,9 +47,14 @@ type decoder struct {
// lastValidOff file offset following the last valid decoded record
lastValidOff int64
crc hash.Hash32
// continueOnCrcError - causes the decoder to continue working even in case of crc mismatch.
// This is a desired mode for tools performing inspection of the corrupted WAL logs.
// See comments on 'Decode' method for semantic.
continueOnCrcError bool
}
func newDecoder(r ...fileutil.FileReader) *decoder {
func NewDecoderAdvanced(continueOnCrcError bool, r ...fileutil.FileReader) Decoder {
readers := make([]*fileutil.FileBufReader, len(r))
for i := range r {
readers[i] = fileutil.NewFileBufReader(r[i])
@ -50,10 +62,20 @@ func newDecoder(r ...fileutil.FileReader) *decoder {
return &decoder{
brs: readers,
crc: crc.New(0, crcTable),
continueOnCrcError: continueOnCrcError,
}
}
func (d *decoder) decode(rec *walpb.Record) error {
func NewDecoder(r ...fileutil.FileReader) Decoder {
return NewDecoderAdvanced(false, r...)
}
// Decode reads the next record out of the file.
// In the success path, fills 'rec' and returns nil.
// When it fails, it returns err and usually resets 'rec' to the defaults.
// When continueOnCrcError is set, the method may return ErrUnexpectedEOF or ErrCRCMismatch, but preserve the read
// (potentially corrupted) record content.
func (d *decoder) Decode(rec *walpb.Record) error {
rec.Reset()
d.mu.Lock()
defer d.mu.Unlock()
@ -104,15 +126,25 @@ func (d *decoder) decodeRecord(rec *walpb.Record) error {
return err
}
// skip crc checking if the record type is crcType
if rec.Type != crcType {
d.crc.Write(rec.Data)
if err := rec.Validate(d.crc.Sum32()); err != nil {
if d.isTornEntry(data) {
return io.ErrUnexpectedEOF
}
// skip crc checking if the record type is CrcType
if rec.Type != CrcType {
_, err := d.crc.Write(rec.Data)
if err != nil {
return err
}
if err := rec.Validate(d.crc.Sum32()); err != nil {
if !d.continueOnCrcError {
rec.Reset()
} else {
// If we continue, we want to update lastValidOff, such that following errors are consistent
defer func() { d.lastValidOff += frameSizeBytes + recBytes + padBytes }()
}
if d.isTornEntry(data) {
return fmt.Errorf("%w: in file '%s' at position: %d", io.ErrUnexpectedEOF, fileBufReader.FileInfo().Name(), d.lastValidOff)
}
return fmt.Errorf("%w: in file '%s' at position: %d", err, fileBufReader.FileInfo().Name(), d.lastValidOff)
}
}
// record decoded as valid; point last valid offset to end of record
d.lastValidOff += frameSizeBytes + recBytes + padBytes
@ -167,23 +199,23 @@ func (d *decoder) isTornEntry(data []byte) bool {
return false
}
func (d *decoder) updateCRC(prevCrc uint32) {
func (d *decoder) UpdateCRC(prevCrc uint32) {
d.crc = crc.New(prevCrc, crcTable)
}
func (d *decoder) lastCRC() uint32 {
func (d *decoder) LastCRC() uint32 {
return d.crc.Sum32()
}
func (d *decoder) lastOffset() int64 { return d.lastValidOff }
func (d *decoder) LastOffset() int64 { return d.lastValidOff }
func mustUnmarshalEntry(d []byte) raftpb.Entry {
func MustUnmarshalEntry(d []byte) raftpb.Entry {
var e raftpb.Entry
pbutil.MustUnmarshal(&e, d)
return e
}
func mustUnmarshalState(d []byte) raftpb.HardState {
func MustUnmarshalState(d []byte) raftpb.HardState {
var s raftpb.HardState
pbutil.MustUnmarshal(&s, d)
return s

View File

@ -57,8 +57,8 @@ func TestReadRecord(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
decoder := newDecoder(fileutil.NewFileReader(f))
e := decoder.decode(rec)
decoder := NewDecoder(fileutil.NewFileReader(f))
e := decoder.Decode(rec)
if !reflect.DeepEqual(rec, tt.wr) {
t.Errorf("#%d: block = %v, want %v", i, rec, tt.wr)
}
@ -81,8 +81,8 @@ func TestWriteRecord(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
decoder := newDecoder(fileutil.NewFileReader(f))
err = decoder.decode(b)
decoder := NewDecoder(fileutil.NewFileReader(f))
err = decoder.Decode(b)
if err != nil {
t.Errorf("err = %v, want nil", err)
}

View File

@ -15,6 +15,7 @@
package wal
import (
"errors"
"io"
"os"
"path/filepath"
@ -41,30 +42,30 @@ func Repair(lg *zap.Logger, dirpath string) bool {
lg.Info("repairing", zap.String("path", f.Name()))
rec := &walpb.Record{}
decoder := newDecoder(fileutil.NewFileReader(f.File))
decoder := NewDecoder(fileutil.NewFileReader(f.File))
for {
lastOffset := decoder.lastOffset()
err := decoder.decode(rec)
switch err {
case nil:
lastOffset := decoder.LastOffset()
err := decoder.Decode(rec)
switch {
case err == nil:
// update crc of the decoder when necessary
switch rec.Type {
case crcType:
crc := decoder.crc.Sum32()
case CrcType:
crc := decoder.LastCRC()
// current crc of decoder must match the crc of the record.
// do no need to match 0 crc, since the decoder is a new one at this case.
if crc != 0 && rec.Validate(crc) != nil {
return false
}
decoder.updateCRC(rec.Crc)
decoder.UpdateCRC(rec.Crc)
}
continue
case io.EOF:
case errors.Is(err, io.EOF):
lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF))
return true
case io.ErrUnexpectedEOF:
case errors.Is(err, io.ErrUnexpectedEOF):
brokenName := f.Name() + ".broken"
bf, bferr := os.Create(brokenName)
if bferr != nil {

View File

@ -20,6 +20,8 @@ import (
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
"go.etcd.io/etcd/server/v3/storage/wal/walpb"
@ -43,86 +45,59 @@ func TestRepairTruncate(t *testing.T) {
}
func testRepair(t *testing.T, ents [][]raftpb.Entry, corrupt corruptFunc, expectedEnts int) {
lg := zaptest.NewLogger(t)
p := t.TempDir()
// create WAL
w, err := Create(zaptest.NewLogger(t), p, nil)
w, err := Create(lg, p, nil)
defer func() {
if err = w.Close(); err != nil {
t.Fatal(err)
}
// The Close might fail.
_ = w.Close()
}()
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)
for _, es := range ents {
if err = w.Save(raftpb.HardState{}, es); err != nil {
t.Fatal(err)
}
require.NoError(t, w.Save(raftpb.HardState{}, es))
}
offset, err := w.tail().Seek(0, io.SeekCurrent)
if err != nil {
t.Fatal(err)
}
w.Close()
require.NoError(t, err)
require.NoError(t, w.Close())
err = corrupt(p, offset)
if err != nil {
t.Fatal(err)
}
require.NoError(t, corrupt(p, offset))
// verify we broke the wal
w, err = Open(zaptest.NewLogger(t), p, walpb.Snapshot{})
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)
_, _, _, err = w.ReadAll()
if err != io.ErrUnexpectedEOF {
t.Fatalf("err = %v, want error %v", err, io.ErrUnexpectedEOF)
}
w.Close()
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
require.NoError(t, w.Close())
// repair the wal
if ok := Repair(zaptest.NewLogger(t), p); !ok {
t.Fatalf("'Repair' returned '%v', want 'true'", ok)
}
require.True(t, Repair(lg, p), "'Repair' returned 'false', want 'true'")
// read it back
w, err = Open(zaptest.NewLogger(t), p, walpb.Snapshot{})
if err != nil {
t.Fatal(err)
}
w, err = Open(lg, p, walpb.Snapshot{})
require.NoError(t, err)
_, _, walEnts, err := w.ReadAll()
if err != nil {
t.Fatal(err)
}
if len(walEnts) != expectedEnts {
t.Fatalf("len(ents) = %d, want %d", len(walEnts), expectedEnts)
}
require.NoError(t, err)
assert.Len(t, walEnts, expectedEnts)
// write some more entries to repaired log
for i := 1; i <= 10; i++ {
es := []raftpb.Entry{{Index: uint64(expectedEnts + i)}}
if err = w.Save(raftpb.HardState{}, es); err != nil {
t.Fatal(err)
require.NoError(t, w.Save(raftpb.HardState{}, es))
}
}
w.Close()
require.NoError(t, w.Close())
// read back entries following repair, ensure it's all there
w, err = Open(zaptest.NewLogger(t), p, walpb.Snapshot{})
if err != nil {
t.Fatal(err)
}
w, err = Open(lg, p, walpb.Snapshot{})
require.NoError(t, err)
_, _, walEnts, err = w.ReadAll()
if err != nil {
t.Fatal(err)
}
if len(walEnts) != expectedEnts+10 {
t.Fatalf("len(ents) = %d, want %d", len(walEnts), expectedEnts+10)
}
require.NoError(t, err)
assert.Len(t, walEnts, expectedEnts+10)
}
func makeEnts(ents int) (ret [][]raftpb.Entry) {

View File

@ -36,11 +36,11 @@ import (
)
const (
metadataType int64 = iota + 1
entryType
stateType
crcType
snapshotType
MetadataType int64 = iota + 1
EntryType
StateType
CrcType
SnapshotType
// warnSyncDuration is the amount of time allotted to an fsync before
// logging a warning
@ -56,7 +56,7 @@ var (
ErrMetadataConflict = errors.New("wal: conflicting metadata found")
ErrFileNotFound = errors.New("wal: file not found")
ErrCRCMismatch = errors.New("wal: crc mismatch")
ErrCRCMismatch = walpb.ErrCRCMismatch
ErrSnapshotMismatch = errors.New("wal: snapshot mismatch")
ErrSnapshotNotFound = errors.New("wal: snapshot not found")
ErrSliceOutOfRange = errors.New("wal: slice bounds out of range")
@ -81,8 +81,8 @@ type WAL struct {
state raftpb.HardState // hardstate recorded at the head of WAL
start walpb.Snapshot // snapshot to start reading
decoder *decoder // decoder to decode records
readClose func() error // closer for decode reader
decoder Decoder // decoder to Decode records
readClose func() error // closer for Decode reader
unsafeNoSync bool // if set, do not fsync
@ -166,7 +166,7 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) {
if err = w.saveCrc(0); err != nil {
return nil, err
}
if err = w.encoder.encode(&walpb.Record{Type: metadataType, Data: metadata}); err != nil {
if err = w.encoder.encode(&walpb.Record{Type: MetadataType, Data: metadata}); err != nil {
return nil, err
}
if err = w.SaveSnapshot(walpb.Snapshot{}); err != nil {
@ -351,7 +351,7 @@ func openAtIndex(lg *zap.Logger, dirpath string, snap walpb.Snapshot, write bool
lg: lg,
dir: dirpath,
start: snap,
decoder: newDecoder(rs...),
decoder: NewDecoder(rs...),
readClose: closer,
locks: ls,
}
@ -452,10 +452,10 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
decoder := w.decoder
var match bool
for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
for err = decoder.Decode(rec); err == nil; err = decoder.Decode(rec) {
switch rec.Type {
case entryType:
e := mustUnmarshalEntry(rec.Data)
case EntryType:
e := MustUnmarshalEntry(rec.Data)
// 0 <= e.Index-w.start.Index - 1 < len(ents)
if e.Index > w.start.Index {
// prevent "panic: runtime error: slice bounds out of range [:13038096702221461992] with capacity 0"
@ -469,27 +469,27 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
}
w.enti = e.Index
case stateType:
state = mustUnmarshalState(rec.Data)
case StateType:
state = MustUnmarshalState(rec.Data)
case metadataType:
case MetadataType:
if metadata != nil && !bytes.Equal(metadata, rec.Data) {
state.Reset()
return nil, state, nil, ErrMetadataConflict
}
metadata = rec.Data
case crcType:
crc := decoder.crc.Sum32()
case CrcType:
crc := decoder.LastCRC()
// current crc of decoder must match the crc of the record.
// do no need to match 0 crc, since the decoder is a new one at this case.
if crc != 0 && rec.Validate(crc) != nil {
state.Reset()
return nil, state, nil, ErrCRCMismatch
}
decoder.updateCRC(rec.Crc)
decoder.UpdateCRC(rec.Crc)
case snapshotType:
case SnapshotType:
var snap walpb.Snapshot
pbutil.MustUnmarshal(&snap, rec.Data)
if snap.Index == w.start.Index {
@ -527,7 +527,7 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
// not all, will cause CRC errors on WAL open. Since the records
// were never fully synced to disk in the first place, it's safe
// to zero them out to avoid any CRC errors from new writes.
if _, err = w.tail().Seek(w.decoder.lastOffset(), io.SeekStart); err != nil {
if _, err = w.tail().Seek(w.decoder.LastOffset(), io.SeekStart); err != nil {
return nil, state, nil, err
}
if err = fileutil.ZeroToEnd(w.tail().File); err != nil {
@ -551,7 +551,7 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
if w.tail() != nil {
// create encoder (chain crc with the decoder), enable appending
w.encoder, err = newFileEncoder(w.tail().File, w.decoder.lastCRC())
w.encoder, err = newFileEncoder(w.tail().File, w.decoder.LastCRC())
if err != nil {
return
}
@ -587,24 +587,24 @@ func ValidSnapshotEntries(lg *zap.Logger, walDir string) ([]walpb.Snapshot, erro
}()
// create a new decoder from the readers on the WAL files
decoder := newDecoder(rs...)
decoder := NewDecoder(rs...)
for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
for err = decoder.Decode(rec); err == nil; err = decoder.Decode(rec) {
switch rec.Type {
case snapshotType:
case SnapshotType:
var loadedSnap walpb.Snapshot
pbutil.MustUnmarshal(&loadedSnap, rec.Data)
snaps = append(snaps, loadedSnap)
case stateType:
state = mustUnmarshalState(rec.Data)
case crcType:
crc := decoder.crc.Sum32()
case StateType:
state = MustUnmarshalState(rec.Data)
case CrcType:
crc := decoder.LastCRC()
// current crc of decoder must match the crc of the record.
// do no need to match 0 crc, since the decoder is a new one at this case.
if crc != 0 && rec.Validate(crc) != nil {
return nil, ErrCRCMismatch
}
decoder.updateCRC(rec.Crc)
decoder.UpdateCRC(rec.Crc)
}
}
// We do not have to read out all the WAL entries
@ -661,24 +661,24 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) (*raftpb.HardSta
}()
// create a new decoder from the readers on the WAL files
decoder := newDecoder(rs...)
decoder := NewDecoder(rs...)
for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
for err = decoder.Decode(rec); err == nil; err = decoder.Decode(rec) {
switch rec.Type {
case metadataType:
case MetadataType:
if metadata != nil && !bytes.Equal(metadata, rec.Data) {
return nil, ErrMetadataConflict
}
metadata = rec.Data
case crcType:
crc := decoder.crc.Sum32()
case CrcType:
crc := decoder.LastCRC()
// Current crc of decoder must match the crc of the record.
// We need not match 0 crc, since the decoder is a new one at this point.
if crc != 0 && rec.Validate(crc) != nil {
return nil, ErrCRCMismatch
}
decoder.updateCRC(rec.Crc)
case snapshotType:
decoder.UpdateCRC(rec.Crc)
case SnapshotType:
var loadedSnap walpb.Snapshot
pbutil.MustUnmarshal(&loadedSnap, rec.Data)
if loadedSnap.Index == snap.Index {
@ -689,8 +689,8 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) (*raftpb.HardSta
}
// We ignore all entry and state type records as these
// are not necessary for validating the WAL contents
case entryType:
case stateType:
case EntryType:
case StateType:
pbutil.MustUnmarshal(&state, rec.Data)
default:
return nil, fmt.Errorf("unexpected block type %d", rec.Type)
@ -748,7 +748,7 @@ func (w *WAL) cut() error {
return err
}
if err = w.encoder.encode(&walpb.Record{Type: metadataType, Data: w.metadata}); err != nil {
if err = w.encoder.encode(&walpb.Record{Type: MetadataType, Data: w.metadata}); err != nil {
return err
}
@ -905,7 +905,7 @@ func (w *WAL) Close() error {
func (w *WAL) saveEntry(e *raftpb.Entry) error {
// TODO: add MustMarshalTo to reduce one allocation.
b := pbutil.MustMarshal(e)
rec := &walpb.Record{Type: entryType, Data: b}
rec := &walpb.Record{Type: EntryType, Data: b}
if err := w.encoder.encode(rec); err != nil {
return err
}
@ -919,7 +919,7 @@ func (w *WAL) saveState(s *raftpb.HardState) error {
}
w.state = *s
b := pbutil.MustMarshal(s)
rec := &walpb.Record{Type: stateType, Data: b}
rec := &walpb.Record{Type: StateType, Data: b}
return w.encoder.encode(rec)
}
@ -968,7 +968,7 @@ func (w *WAL) SaveSnapshot(e walpb.Snapshot) error {
w.mu.Lock()
defer w.mu.Unlock()
rec := &walpb.Record{Type: snapshotType, Data: b}
rec := &walpb.Record{Type: SnapshotType, Data: b}
if err := w.encoder.encode(rec); err != nil {
return err
}
@ -980,7 +980,7 @@ func (w *WAL) SaveSnapshot(e walpb.Snapshot) error {
}
func (w *WAL) saveCrc(prevCrc uint32) error {
return w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc})
return w.encoder.encode(&walpb.Record{Type: CrcType, Crc: prevCrc})
}
func (w *WAL) tail() *fileutil.LockedFile {

View File

@ -74,16 +74,16 @@ func TestNew(t *testing.T) {
var wb bytes.Buffer
e := newEncoder(&wb, 0, 0)
err = e.encode(&walpb.Record{Type: crcType, Crc: 0})
err = e.encode(&walpb.Record{Type: CrcType, Crc: 0})
if err != nil {
t.Fatalf("err = %v, want nil", err)
}
err = e.encode(&walpb.Record{Type: metadataType, Data: []byte("somedata")})
err = e.encode(&walpb.Record{Type: MetadataType, Data: []byte("somedata")})
if err != nil {
t.Fatalf("err = %v, want nil", err)
}
r := &walpb.Record{
Type: snapshotType,
Type: SnapshotType,
Data: pbutil.MustMarshal(&walpb.Snapshot{}),
}
if err = e.encode(r); err != nil {
@ -302,7 +302,7 @@ func TestCut(t *testing.T) {
}
defer f.Close()
nw := &WAL{
decoder: newDecoder(fileutil.NewFileReader(f)),
decoder: NewDecoder(fileutil.NewFileReader(f)),
start: snap,
}
_, gst, _, err := nw.ReadAll()

View File

@ -14,7 +14,10 @@
package walpb
import "errors"
import (
"errors"
"fmt"
)
var (
ErrCRCMismatch = errors.New("walpb: crc mismatch")
@ -24,8 +27,7 @@ func (rec *Record) Validate(crc uint32) error {
if rec.Crc == crc {
return nil
}
rec.Reset()
return ErrCRCMismatch
return fmt.Errorf("%w: expected: %x computed: %x", ErrCRCMismatch, rec.Crc, crc)
}
// ValidateSnapshotForWrite ensures the Snapshot the newly written snapshot is valid.

View File

@ -52,38 +52,7 @@ func TestEtcdDumpLogEntryType(t *testing.T) {
p := t.TempDir()
memberdir := filepath.Join(p, "member")
err = os.Mkdir(memberdir, 0744)
if err != nil {
t.Fatal(err)
}
waldir := walDir(p)
snapdir := snapDir(p)
w, err := wal.Create(zaptest.NewLogger(t), waldir, nil)
if err != nil {
t.Fatal(err)
}
err = os.Mkdir(snapdir, 0744)
if err != nil {
t.Fatal(err)
}
ents := make([]raftpb.Entry, 0)
// append entries into wal log
appendConfigChangeEnts(&ents)
appendNormalRequestEnts(&ents)
appendNormalIRREnts(&ents)
appendUnknownNormalEnts(&ents)
// force commit newly appended entries
err = w.Save(raftpb.HardState{}, ents)
if err != nil {
t.Fatal(err)
}
w.Close()
mustCreateWalLog(t, p)
argtests := []struct {
name string
@ -128,6 +97,41 @@ func TestEtcdDumpLogEntryType(t *testing.T) {
}
func mustCreateWalLog(t *testing.T, path string) {
memberdir := filepath.Join(path, "member")
err := os.Mkdir(memberdir, 0744)
if err != nil {
t.Fatal(err)
}
waldir := walDir(path)
snapdir := snapDir(path)
w, err := wal.Create(zaptest.NewLogger(t), waldir, nil)
if err != nil {
t.Fatal(err)
}
err = os.Mkdir(snapdir, 0744)
if err != nil {
t.Fatal(err)
}
ents := make([]raftpb.Entry, 0)
// append entries into wal log
appendConfigChangeEnts(&ents)
appendNormalRequestEnts(&ents)
appendNormalIRREnts(&ents)
appendUnknownNormalEnts(&ents)
// force commit newly appended entries
err = w.Save(raftpb.HardState{}, ents)
if err != nil {
t.Fatal(err)
}
w.Close()
}
func appendConfigChangeEnts(ents *[]raftpb.Entry) {
configChangeData := []raftpb.ConfChange{
{ID: 1, Type: raftpb.ConfChangeAddNode, NodeID: 2, Context: []byte("")},

View File

@ -56,8 +56,10 @@ IRRCompaction, IRRLeaseGrant, IRRLeaseRevoke, IRRLeaseCheckpoint`)
streamdecoder := flag.String("stream-decoder", "", `The name of an executable decoding tool, the executable must process
hex encoded lines of binary input (from etcd-dump-logs)
and output a hex encoded line of binary for each input line`)
raw := flag.Bool("raw", false, "Read the logs in the low-level form")
flag.Parse()
lg := zap.NewExample()
if len(flag.Args()) != 1 {
log.Fatalf("Must provide data-dir argument (got %+v)", flag.Args())
@ -68,6 +70,37 @@ and output a hex encoded line of binary for each input line`)
log.Fatal("start-snap and start-index flags cannot be used together.")
}
if !*raw {
ents := readUsingReadAll(lg, index, snapfile, dataDir, waldir)
fmt.Printf("WAL entries: %d\n", len(ents))
if len(ents) > 0 {
fmt.Printf("lastIndex=%d\n", ents[len(ents)-1].Index)
}
fmt.Printf("%4s\t%10s\ttype\tdata", "term", "index")
if *streamdecoder != "" {
fmt.Print("\tdecoder_status\tdecoded_data")
}
fmt.Println()
listEntriesType(*entrytype, *streamdecoder, ents)
} else {
if *snapfile != "" ||
*entrytype != defaultEntryTypes ||
*streamdecoder != "" {
log.Fatalf("Flags --entry-type, --stream-decoder, --entrytype not supported in the RAW mode.")
}
wd := *waldir
if wd == "" {
wd = walDir(dataDir)
}
readRaw(index, wd, os.Stdout)
}
}
func readUsingReadAll(lg *zap.Logger, index *uint64, snapfile *string, dataDir string, waldir *string) []raftpb.Entry {
var (
walsnap walpb.Snapshot
snapshot *raftpb.Snapshot
@ -81,10 +114,10 @@ and output a hex encoded line of binary for each input line`)
walsnap.Index = *index
} else {
if *snapfile == "" {
ss := snap.New(zap.NewExample(), snapDir(dataDir))
ss := snap.New(lg, snapDir(dataDir))
snapshot, err = ss.Load()
} else {
snapshot, err = snap.Read(zap.NewExample(), filepath.Join(snapDir(dataDir), *snapfile))
snapshot, err = snap.Read(lg, filepath.Join(snapDir(dataDir), *snapfile))
}
switch err {
@ -123,19 +156,7 @@ and output a hex encoded line of binary for each input line`)
vid := types.ID(state.Vote)
fmt.Printf("WAL metadata:\nnodeID=%s clusterID=%s term=%d commitIndex=%d vote=%s\n",
id, cid, state.Term, state.Commit, vid)
fmt.Printf("WAL entries: %d\n", len(ents))
if len(ents) > 0 {
fmt.Printf("lastIndex=%d\n", ents[len(ents)-1].Index)
}
fmt.Printf("%4s\t%10s\ttype\tdata", "term", "index")
if *streamdecoder != "" {
fmt.Print("\tdecoder_status\tdecoded_data")
}
fmt.Println()
listEntriesType(*entrytype, *streamdecoder, ents)
return ents
}
func walDir(dataDir string) string { return filepath.Join(dataDir, "member", "wal") }

107
tools/etcd-dump-logs/raw.go Normal file
View File

@ -0,0 +1,107 @@
// Copyright 2022 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"go.etcd.io/etcd/api/v3/etcdserverpb"
"go.etcd.io/etcd/client/pkg/v3/fileutil"
"go.etcd.io/etcd/pkg/v3/pbutil"
"go.etcd.io/etcd/server/v3/storage/wal"
"go.etcd.io/etcd/server/v3/storage/wal/walpb"
"go.etcd.io/raft/v3/raftpb"
)
func readRaw(fromIndex *uint64, waldir string, out io.Writer) {
var walReaders []fileutil.FileReader
files, err := ioutil.ReadDir(waldir)
if err != nil {
log.Fatalf("Error: Failed to read directory '%s' error:%v", waldir, err)
}
for _, finfo := range files {
if filepath.Ext(finfo.Name()) != ".wal" {
log.Printf("Warning: Ignoring not .wal file: %s", finfo.Name())
continue
}
f, err := os.Open(filepath.Join(waldir, finfo.Name()))
if err != nil {
log.Printf("Error: Failed to read file: %s . error:%v", finfo.Name(), err)
}
walReaders = append(walReaders, fileutil.NewFileReader(f))
}
decoder := wal.NewDecoderAdvanced(true, walReaders...)
// The variable is used to not pollute log with multiple continuous crc errors.
crcDesync := false
for {
rec := walpb.Record{}
err := decoder.Decode(&rec)
if err == nil || errors.Is(err, walpb.ErrCRCMismatch) {
if err != nil && !crcDesync {
log.Printf("Error: Reading entry failed with CRC error: %c", err)
crcDesync = true
}
printRec(&rec, fromIndex, out)
if rec.Type == wal.CrcType {
decoder.UpdateCRC(rec.Crc)
crcDesync = false
}
continue
}
if errors.Is(err, io.EOF) {
fmt.Fprintf(out, "EOF: All entries were processed.\n")
break
} else {
log.Printf("Error: Reading failed: %v", err)
break
}
}
}
func printRec(rec *walpb.Record, fromIndex *uint64, out io.Writer) {
switch rec.Type {
case wal.MetadataType:
var metadata etcdserverpb.Metadata
pbutil.MustUnmarshal(&metadata, rec.Data)
fmt.Fprintf(out, "Metadata: %s\n", metadata.String())
case wal.CrcType:
fmt.Fprintf(out, "CRC: %d\n", rec.Crc)
case wal.EntryType:
e := wal.MustUnmarshalEntry(rec.Data)
if fromIndex == nil || e.Index >= *fromIndex {
fmt.Fprintf(out, "Entry: %s\n", e.String())
}
case wal.SnapshotType:
var snap walpb.Snapshot
pbutil.MustUnmarshal(&snap, rec.Data)
if fromIndex == nil || snap.Index >= *fromIndex {
fmt.Fprintf(out, "Snapshot: %s\n", snap.String())
}
case wal.StateType:
var state raftpb.HardState
pbutil.MustUnmarshal(&state, rec.Data)
if fromIndex == nil || state.Commit >= *fromIndex {
fmt.Fprintf(out, "HardState: %s\n", state.String())
}
default:
log.Printf("Unexpected WAL log type: %d", rec.Type)
}
}

View File

@ -0,0 +1,68 @@
// Copyright 2022 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bytes"
"testing"
"github.com/stretchr/testify/assert"
)
func Test_readRaw(t *testing.T) {
path := t.TempDir()
mustCreateWalLog(t, path)
var out bytes.Buffer
readRaw(nil, walDir(path), &out)
assert.Equal(t,
`CRC: 0
Metadata:
Snapshot:
Entry: Term:1 Index:1 Type:EntryConfChange Data:"\010\001\020\000\030\002\"\000"
Entry: Term:2 Index:2 Type:EntryConfChange Data:"\010\002\020\001\030\002\"\000"
Entry: Term:2 Index:3 Type:EntryConfChange Data:"\010\003\020\002\030\002\"\000"
Entry: Term:2 Index:4 Type:EntryConfChange Data:"\010\004\020\003\030\003\"\000"
Entry: Term:3 Index:5 Data:"\010\000\022\000\032\006/path0\"\030{\"hey\":\"ho\",\"hi\":[\"yo\"]}(\0012\0008\000@\000H\tP\000X\001`+"`"+`\000h\000p\000x\001\200\001\000\210\001\000"
Entry: Term:3 Index:6 Data:"\010\001\022\004QGET\032\006/path1\"\023{\"0\":\"1\",\"2\":[\"3\"]}(\0002\0008\000@\000H\tP\000X\001`+"`"+`\000h\000p\000x\001\200\001\000\210\001\000"
Entry: Term:3 Index:7 Data:"\010\002\022\004SYNC\032\006/path2\"\023{\"0\":\"1\",\"2\":[\"3\"]}(\0002\0008\000@\000H\002P\000X\001`+"`"+`\000h\000p\000x\001\200\001\000\210\001\000"
Entry: Term:3 Index:8 Data:"\010\003\022\006DELETE\032\006/path3\"\030{\"hey\":\"ho\",\"hi\":[\"yo\"]}(\0002\0008\000@\001H\002P\000X\001`+"`"+`\000h\000p\000x\001\200\001\000\210\001\000"
Entry: Term:3 Index:9 Data:"\010\004\022\006RANDOM\032\246\001/path4/superlong/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path/path\"\030{\"hey\":\"ho\",\"hi\":[\"yo\"]}(\0002\0008\000@\000H\002P\000X\001`+"`"+`\000h\000p\000x\001\200\001\000\210\001\000"
Entry: Term:4 Index:10 Data:"\010\005\032\025\n\0011\022\002hi\030\006 \001(\001X\240\234\001h\240\234\001"
Entry: Term:5 Index:11 Data:"\010\006\"\020\n\004foo1\022\004bar1\030\0010\001"
Entry: Term:6 Index:12 Data:"\010\007*\010\n\0010\022\0019\030\001"
Entry: Term:7 Index:13 Data:"\010\0102\024\022\010\032\006\n\001a\022\001b\032\010\032\006\n\001a\022\001b"
Entry: Term:8 Index:14 Data:"\010\t:\002\020\001"
Entry: Term:9 Index:15 Data:"\010\nB\004\010\001\020\001"
Entry: Term:10 Index:16 Data:"\010\013J\002\010\002"
Entry: Term:11 Index:17 Data:"\010\014R\006\010\003\020\004\030\005"
Entry: Term:12 Index:18 Data:"\010\r\302>\000"
Entry: Term:13 Index:19 Data:"\010\016\232?\000"
Entry: Term:14 Index:20 Data:"\010\017\242?\031\n\006myname\022\010password\032\005token"
Entry: Term:15 Index:21 Data:"\010\020\342D\020\n\005name1\022\005pass1\032\000"
Entry: Term:16 Index:22 Data:"\010\021\352D\007\n\005name1"
Entry: Term:17 Index:23 Data:"\010\022\362D\007\n\005name1"
Entry: Term:18 Index:24 Data:"\010\023\372D\016\n\005name1\022\005pass2"
Entry: Term:19 Index:25 Data:"\010\024\202E\016\n\005user1\022\005role1"
Entry: Term:20 Index:26 Data:"\010\025\212E\016\n\005user2\022\005role2"
Entry: Term:21 Index:27 Data:"\010\026\222E\000"
Entry: Term:22 Index:28 Data:"\010\027\232E\000"
Entry: Term:23 Index:29 Data:"\010\030\202K\007\n\005role2"
Entry: Term:24 Index:30 Data:"\010\031\212K\007\n\005role1"
Entry: Term:25 Index:31 Data:"\010\032\222K\007\n\005role3"
Entry: Term:26 Index:32 Data:"\010\033\232K\033\n\005role3\022\022\010\001\022\004Keys\032\010RangeEnd"
Entry: Term:27 Index:33 Data:"\010\034\242K\026\n\005role3\022\003key\032\010rangeend"
Entry: Term:27 Index:34 Data:"?"
EOF: All entries were processed.
`, out.String())
}