Merge pull request #11924 from tangcong/fix-crc-mismatch

wal: fix crc mismatch crash bug
This commit is contained in:
Gyuho Lee 2020-05-20 10:30:40 -07:00 committed by GitHub
commit 188c66ada8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 0 deletions

View File

@ -119,6 +119,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
- Previously, server restore fails if it had crashed after persisting raft hard state but before saving snapshot. - Previously, server restore fails if it had crashed after persisting raft hard state but before saving snapshot.
- See https://github.com/etcd-io/etcd/issues/10219 for more. - See https://github.com/etcd-io/etcd/issues/10219 for more.
- Improve logging around snapshot send and receive. - Improve logging around snapshot send and receive.
- Add [missing CRC checksum check in WAL validate method otherwise causes panic](https://github.com/etcd-io/etcd/pull/11924).
### Package `embed` ### Package `embed`

View File

@ -567,6 +567,14 @@ func ValidSnapshotEntries(lg *zap.Logger, walDir string) ([]walpb.Snapshot, erro
snaps = append(snaps, loadedSnap) snaps = append(snaps, loadedSnap)
case stateType: case stateType:
state = mustUnmarshalState(rec.Data) state = mustUnmarshalState(rec.Data)
case crcType:
crc := decoder.crc.Sum32()
// current crc of decoder must match the crc of the record.
// do no need to match 0 crc, since the decoder is a new one at this case.
if crc != 0 && rec.Validate(crc) != nil {
return nil, ErrCRCMismatch
}
decoder.updateCRC(rec.Crc)
} }
} }
// We do not have to read out all the WAL entries // We do not have to read out all the WAL entries

View File

@ -1052,3 +1052,60 @@ func TestValidSnapshotEntries(t *testing.T) {
t.Errorf("expected walSnaps %+v, got %+v", expected, walSnaps) t.Errorf("expected walSnaps %+v, got %+v", expected, walSnaps)
} }
} }
// TestValidSnapshotEntriesAfterPurgeWal ensure that there are many wal files, and after cleaning the first wal file,
// it can work well.
func TestValidSnapshotEntriesAfterPurgeWal(t *testing.T) {
oldSegmentSizeBytes := SegmentSizeBytes
SegmentSizeBytes = 64
defer func() {
SegmentSizeBytes = oldSegmentSizeBytes
}()
p, err := ioutil.TempDir(os.TempDir(), "waltest")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(p)
snap0 := walpb.Snapshot{Index: 0, Term: 0}
snap1 := walpb.Snapshot{Index: 1, Term: 1}
state1 := raftpb.HardState{Commit: 1, Term: 1}
snap2 := walpb.Snapshot{Index: 2, Term: 1}
snap3 := walpb.Snapshot{Index: 3, Term: 2}
state2 := raftpb.HardState{Commit: 3, Term: 2}
func() {
w, err := Create(zap.NewExample(), p, nil)
if err != nil {
t.Fatal(err)
}
defer w.Close()
// snap0 is implicitly created at index 0, term 0
if err = w.SaveSnapshot(snap1); err != nil {
t.Fatal(err)
}
if err = w.Save(state1, nil); err != nil {
t.Fatal(err)
}
if err = w.SaveSnapshot(snap2); err != nil {
t.Fatal(err)
}
if err = w.SaveSnapshot(snap3); err != nil {
t.Fatal(err)
}
for i := 0; i < 128; i++ {
if err = w.Save(state2, nil); err != nil {
t.Fatal(err)
}
}
}()
files, _, err := selectWALFiles(nil, p, snap0)
if err != nil {
t.Fatal(err)
}
os.Remove(p + "/" + files[0])
_, err = ValidSnapshotEntries(zap.NewExample(), p)
if err != nil {
t.Fatal(err)
}
}