etcd/server/storage/wal/repair.go
Piotr Tabor 58681d3feb Expose types of entries within the WAL log for access from the tools.
Signed-off-by: Piotr Tabor <ptab@google.com>
2022-12-30 09:19:23 +01:00

118 lines
3.2 KiB
Go

// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package wal
import (
"errors"
"io"
"os"
"path/filepath"
"time"
"go.uber.org/zap"
"go.etcd.io/etcd/client/pkg/v3/fileutil"
"go.etcd.io/etcd/server/v3/storage/wal/walpb"
)
// Repair tries to repair ErrUnexpectedEOF in the
// last wal file by truncating.
func Repair(lg *zap.Logger, dirpath string) bool {
if lg == nil {
lg = zap.NewNop()
}
f, err := openLast(lg, dirpath)
if err != nil {
return false
}
defer f.Close()
lg.Info("repairing", zap.String("path", f.Name()))
rec := &walpb.Record{}
decoder := NewDecoder(fileutil.NewFileReader(f.File))
for {
lastOffset := decoder.LastOffset()
err := decoder.Decode(rec)
switch {
case err == nil:
// update crc of the decoder when necessary
switch rec.Type {
case CrcType:
crc := decoder.LastCRC()
// current crc of decoder must match the crc of the record.
// do no need to match 0 crc, since the decoder is a new one at this case.
if crc != 0 && rec.Validate(crc) != nil {
return false
}
decoder.UpdateCRC(rec.Crc)
}
continue
case errors.Is(err, io.EOF):
lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF))
return true
case errors.Is(err, io.ErrUnexpectedEOF):
brokenName := f.Name() + ".broken"
bf, bferr := os.Create(brokenName)
if bferr != nil {
lg.Warn("failed to create backup file", zap.String("path", brokenName), zap.Error(bferr))
return false
}
defer bf.Close()
if _, err = f.Seek(0, io.SeekStart); err != nil {
lg.Warn("failed to read file", zap.String("path", f.Name()), zap.Error(err))
return false
}
if _, err = io.Copy(bf, f); err != nil {
lg.Warn("failed to copy", zap.String("from", f.Name()), zap.String("to", brokenName), zap.Error(err))
return false
}
if err = f.Truncate(lastOffset); err != nil {
lg.Warn("failed to truncate", zap.String("path", f.Name()), zap.Error(err))
return false
}
start := time.Now()
if err = fileutil.Fsync(f.File); err != nil {
lg.Warn("failed to fsync", zap.String("path", f.Name()), zap.Error(err))
return false
}
walFsyncSec.Observe(time.Since(start).Seconds())
lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.ErrUnexpectedEOF))
return true
default:
lg.Warn("failed to repair", zap.String("path", f.Name()), zap.Error(err))
return false
}
}
}
// openLast opens the last wal file for read and write.
func openLast(lg *zap.Logger, dirpath string) (*fileutil.LockedFile, error) {
names, err := readWALNames(lg, dirpath)
if err != nil {
return nil, err
}
last := filepath.Join(dirpath, names[len(names)-1])
return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode)
}