mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
wal decoding: Add optional mode to ignore CRC errors.
Signed-off-by: Piotr Tabor <ptab@google.com>
This commit is contained in:
parent
498619bdda
commit
bee2a08968
@ -40,6 +40,11 @@ type decoder struct {
|
|||||||
// lastValidOff file offset following the last valid decoded record
|
// lastValidOff file offset following the last valid decoded record
|
||||||
lastValidOff int64
|
lastValidOff int64
|
||||||
crc hash.Hash32
|
crc hash.Hash32
|
||||||
|
|
||||||
|
// continueOnCrcError - causes the decoder to continue working even in case of crc mismatch.
|
||||||
|
// This is a desired mode for tools performing inspection of the corrupted WAL logs.
|
||||||
|
// See comments on 'decode' method for semantic.
|
||||||
|
continueOnCrcError bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDecoder(r ...fileutil.FileReader) *decoder {
|
func newDecoder(r ...fileutil.FileReader) *decoder {
|
||||||
@ -53,6 +58,11 @@ func newDecoder(r ...fileutil.FileReader) *decoder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decode reads the next record out of the file.
|
||||||
|
// In the success path, fills 'rec' and returns nil.
|
||||||
|
// When it fails, it returns err and usually resets 'rec' to the defaults.
|
||||||
|
// When continueOnCrcError is set, the method may return ErrUnexpectedEOF or ErrCRCMismatch, but preserve the read
|
||||||
|
// (potentially corrupted) record content.
|
||||||
func (d *decoder) decode(rec *walpb.Record) error {
|
func (d *decoder) decode(rec *walpb.Record) error {
|
||||||
rec.Reset()
|
rec.Reset()
|
||||||
d.mu.Lock()
|
d.mu.Lock()
|
||||||
@ -108,6 +118,13 @@ func (d *decoder) decodeRecord(rec *walpb.Record) error {
|
|||||||
if rec.Type != crcType {
|
if rec.Type != crcType {
|
||||||
d.crc.Write(rec.Data)
|
d.crc.Write(rec.Data)
|
||||||
if err := rec.Validate(d.crc.Sum32()); err != nil {
|
if err := rec.Validate(d.crc.Sum32()); err != nil {
|
||||||
|
if !d.continueOnCrcError {
|
||||||
|
rec.Reset()
|
||||||
|
} else {
|
||||||
|
// If we continue, we want to update lastValidOff, such that following errors are consistent
|
||||||
|
defer func() { d.lastValidOff += frameSizeBytes + recBytes + padBytes }()
|
||||||
|
}
|
||||||
|
|
||||||
if d.isTornEntry(data) {
|
if d.isTornEntry(data) {
|
||||||
return fmt.Errorf("%w: in file '%s' at position: %d", io.ErrUnexpectedEOF, fileBufReader.FileInfo().Name(), d.lastValidOff)
|
return fmt.Errorf("%w: in file '%s' at position: %d", io.ErrUnexpectedEOF, fileBufReader.FileInfo().Name(), d.lastValidOff)
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
package wal
|
package wal
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -45,8 +46,8 @@ func Repair(lg *zap.Logger, dirpath string) bool {
|
|||||||
for {
|
for {
|
||||||
lastOffset := decoder.lastOffset()
|
lastOffset := decoder.lastOffset()
|
||||||
err := decoder.decode(rec)
|
err := decoder.decode(rec)
|
||||||
switch err {
|
switch {
|
||||||
case nil:
|
case err == nil:
|
||||||
// update crc of the decoder when necessary
|
// update crc of the decoder when necessary
|
||||||
switch rec.Type {
|
switch rec.Type {
|
||||||
case crcType:
|
case crcType:
|
||||||
@ -60,11 +61,11 @@ func Repair(lg *zap.Logger, dirpath string) bool {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
|
|
||||||
case io.EOF:
|
case errors.Is(err, io.EOF):
|
||||||
lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF))
|
lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF))
|
||||||
return true
|
return true
|
||||||
|
|
||||||
case io.ErrUnexpectedEOF:
|
case errors.Is(err, io.ErrUnexpectedEOF):
|
||||||
brokenName := f.Name() + ".broken"
|
brokenName := f.Name() + ".broken"
|
||||||
bf, bferr := os.Create(brokenName)
|
bf, bferr := os.Create(brokenName)
|
||||||
if bferr != nil {
|
if bferr != nil {
|
||||||
|
@ -16,6 +16,8 @@ package wal
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
@ -43,86 +45,59 @@ func TestRepairTruncate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func testRepair(t *testing.T, ents [][]raftpb.Entry, corrupt corruptFunc, expectedEnts int) {
|
func testRepair(t *testing.T, ents [][]raftpb.Entry, corrupt corruptFunc, expectedEnts int) {
|
||||||
|
lg := zaptest.NewLogger(t)
|
||||||
p := t.TempDir()
|
p := t.TempDir()
|
||||||
|
|
||||||
// create WAL
|
// create WAL
|
||||||
w, err := Create(zaptest.NewLogger(t), p, nil)
|
w, err := Create(lg, p, nil)
|
||||||
defer func() {
|
defer func() {
|
||||||
if err = w.Close(); err != nil {
|
// The Close might fail.
|
||||||
t.Fatal(err)
|
_ = w.Close()
|
||||||
}
|
|
||||||
}()
|
}()
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, es := range ents {
|
for _, es := range ents {
|
||||||
if err = w.Save(raftpb.HardState{}, es); err != nil {
|
assert.NoError(t, w.Save(raftpb.HardState{}, es))
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
offset, err := w.tail().Seek(0, io.SeekCurrent)
|
offset, err := w.tail().Seek(0, io.SeekCurrent)
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Fatal(err)
|
require.NoError(t, w.Close())
|
||||||
}
|
|
||||||
w.Close()
|
|
||||||
|
|
||||||
err = corrupt(p, offset)
|
require.NoError(t, corrupt(p, offset))
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// verify we broke the wal
|
// verify we broke the wal
|
||||||
w, err = Open(zaptest.NewLogger(t), p, walpb.Snapshot{})
|
w, err = Open(zaptest.NewLogger(t), p, walpb.Snapshot{})
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
_, _, _, err = w.ReadAll()
|
_, _, _, err = w.ReadAll()
|
||||||
if err != io.ErrUnexpectedEOF {
|
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
|
||||||
t.Fatalf("err = %v, want error %v", err, io.ErrUnexpectedEOF)
|
require.NoError(t, w.Close())
|
||||||
}
|
|
||||||
w.Close()
|
|
||||||
|
|
||||||
// repair the wal
|
// repair the wal
|
||||||
if ok := Repair(zaptest.NewLogger(t), p); !ok {
|
require.True(t, Repair(lg, p), "'Repair' returned 'false', want 'true'")
|
||||||
t.Fatalf("'Repair' returned '%v', want 'true'", ok)
|
|
||||||
}
|
|
||||||
|
|
||||||
// read it back
|
// read it back
|
||||||
w, err = Open(zaptest.NewLogger(t), p, walpb.Snapshot{})
|
w, err = Open(lg, p, walpb.Snapshot{})
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
_, _, walEnts, err := w.ReadAll()
|
_, _, walEnts, err := w.ReadAll()
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Fatal(err)
|
assert.Len(t, walEnts, expectedEnts)
|
||||||
}
|
|
||||||
if len(walEnts) != expectedEnts {
|
|
||||||
t.Fatalf("len(ents) = %d, want %d", len(walEnts), expectedEnts)
|
|
||||||
}
|
|
||||||
|
|
||||||
// write some more entries to repaired log
|
// write some more entries to repaired log
|
||||||
for i := 1; i <= 10; i++ {
|
for i := 1; i <= 10; i++ {
|
||||||
es := []raftpb.Entry{{Index: uint64(expectedEnts + i)}}
|
es := []raftpb.Entry{{Index: uint64(expectedEnts + i)}}
|
||||||
if err = w.Save(raftpb.HardState{}, es); err != nil {
|
require.NoError(t, w.Save(raftpb.HardState{}, es))
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
w.Close()
|
require.NoError(t, w.Close())
|
||||||
|
|
||||||
// read back entries following repair, ensure it's all there
|
// read back entries following repair, ensure it's all there
|
||||||
w, err = Open(zaptest.NewLogger(t), p, walpb.Snapshot{})
|
w, err = Open(lg, p, walpb.Snapshot{})
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
_, _, walEnts, err = w.ReadAll()
|
_, _, walEnts, err = w.ReadAll()
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Fatal(err)
|
assert.Len(t, walEnts, expectedEnts+10)
|
||||||
}
|
|
||||||
if len(walEnts) != expectedEnts+10 {
|
|
||||||
t.Fatalf("len(ents) = %d, want %d", len(walEnts), expectedEnts+10)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeEnts(ents int) (ret [][]raftpb.Entry) {
|
func makeEnts(ents int) (ret [][]raftpb.Entry) {
|
||||||
|
@ -14,7 +14,10 @@
|
|||||||
|
|
||||||
package walpb
|
package walpb
|
||||||
|
|
||||||
import "errors"
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ErrCRCMismatch = errors.New("walpb: crc mismatch")
|
ErrCRCMismatch = errors.New("walpb: crc mismatch")
|
||||||
@ -24,8 +27,7 @@ func (rec *Record) Validate(crc uint32) error {
|
|||||||
if rec.Crc == crc {
|
if rec.Crc == crc {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
rec.Reset()
|
return fmt.Errorf("%w: expected: %x computed: %x", ErrCRCMismatch, rec.Crc, crc)
|
||||||
return ErrCRCMismatch
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ValidateSnapshotForWrite ensures the Snapshot the newly written snapshot is valid.
|
// ValidateSnapshotForWrite ensures the Snapshot the newly written snapshot is valid.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user