mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
process the scenaro of the last WAL record being partially synced to disk
We need to return io.ErrUnexpectedEOF in the error chain, so that etcdserver can repair it automatically. Signed-off-by: Benjamin Wang <wachao@vmware.com>
This commit is contained in:
parent
6200b22f79
commit
bd9f1584d4
@ -16,6 +16,7 @@ package etcdserver
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -27,8 +28,6 @@ import (
|
|||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"go.etcd.io/etcd/server/v3/etcdserver/errors"
|
|
||||||
|
|
||||||
"go.etcd.io/etcd/api/v3/etcdserverpb"
|
"go.etcd.io/etcd/api/v3/etcdserverpb"
|
||||||
"go.etcd.io/etcd/client/pkg/v3/fileutil"
|
"go.etcd.io/etcd/client/pkg/v3/fileutil"
|
||||||
"go.etcd.io/etcd/client/pkg/v3/types"
|
"go.etcd.io/etcd/client/pkg/v3/types"
|
||||||
@ -42,6 +41,7 @@ import (
|
|||||||
"go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
|
"go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
|
||||||
"go.etcd.io/etcd/server/v3/etcdserver/api/v3discovery"
|
"go.etcd.io/etcd/server/v3/etcdserver/api/v3discovery"
|
||||||
"go.etcd.io/etcd/server/v3/etcdserver/cindex"
|
"go.etcd.io/etcd/server/v3/etcdserver/cindex"
|
||||||
|
servererrors "go.etcd.io/etcd/server/v3/etcdserver/errors"
|
||||||
serverstorage "go.etcd.io/etcd/server/v3/storage"
|
serverstorage "go.etcd.io/etcd/server/v3/storage"
|
||||||
"go.etcd.io/etcd/server/v3/storage/backend"
|
"go.etcd.io/etcd/server/v3/storage/backend"
|
||||||
"go.etcd.io/etcd/server/v3/storage/schema"
|
"go.etcd.io/etcd/server/v3/storage/schema"
|
||||||
@ -339,7 +339,7 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper) (*
|
|||||||
str, err = v3discovery.JoinCluster(cfg.Logger, &cfg.DiscoveryCfg, m.ID, cfg.InitialPeerURLsMap.String())
|
str, err = v3discovery.JoinCluster(cfg.Logger, &cfg.DiscoveryCfg, m.ID, cfg.InitialPeerURLsMap.String())
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, &errors.DiscoveryError{Op: "join", Err: err}
|
return nil, &servererrors.DiscoveryError{Op: "join", Err: err}
|
||||||
}
|
}
|
||||||
var urlsmap types.URLsMap
|
var urlsmap types.URLsMap
|
||||||
urlsmap, err = types.NewURLsMap(str)
|
urlsmap, err = types.NewURLsMap(str)
|
||||||
@ -614,7 +614,7 @@ func openWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot) (*w
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
w.Close()
|
w.Close()
|
||||||
// we can only repair ErrUnexpectedEOF and we never repair twice.
|
// we can only repair ErrUnexpectedEOF and we never repair twice.
|
||||||
if repaired || err != io.ErrUnexpectedEOF {
|
if repaired || !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
cfg.Logger.Fatal("failed to read WAL, cannot be repaired", zap.Error(err))
|
cfg.Logger.Fatal("failed to read WAL, cannot be repaired", zap.Error(err))
|
||||||
}
|
}
|
||||||
if !wal.Repair(cfg.Logger, cfg.WALDir()) {
|
if !wal.Repair(cfg.Logger, cfg.WALDir()) {
|
||||||
|
@ -106,8 +106,8 @@ func (d *decoder) decodeRecord(rec *walpb.Record) error {
|
|||||||
// The length of current WAL entry must be less than the remaining file size.
|
// The length of current WAL entry must be less than the remaining file size.
|
||||||
maxEntryLimit := fileBufReader.FileInfo().Size() - d.lastValidOff - padBytes
|
maxEntryLimit := fileBufReader.FileInfo().Size() - d.lastValidOff - padBytes
|
||||||
if recBytes > maxEntryLimit {
|
if recBytes > maxEntryLimit {
|
||||||
return fmt.Errorf("wal: max entry size limit exceeded, recBytes: %d, fileSize(%d) - offset(%d) - padBytes(%d) = entryLimit(%d)",
|
return fmt.Errorf("%w: [wal] max entry size limit exceeded when reading %q, recBytes: %d, fileSize(%d) - offset(%d) - padBytes(%d) = entryLimit(%d)",
|
||||||
recBytes, fileBufReader.FileInfo().Size(), d.lastValidOff, padBytes, maxEntryLimit)
|
io.ErrUnexpectedEOF, fileBufReader.FileInfo().Name(), recBytes, fileBufReader.FileInfo().Size(), d.lastValidOff, padBytes, maxEntryLimit)
|
||||||
}
|
}
|
||||||
|
|
||||||
data := make([]byte, recBytes+padBytes)
|
data := make([]byte, recBytes+padBytes)
|
||||||
|
@ -510,14 +510,14 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
|||||||
case nil:
|
case nil:
|
||||||
// We do not have to read out all entries in read mode.
|
// We do not have to read out all entries in read mode.
|
||||||
// The last record maybe a partial written one, so
|
// The last record maybe a partial written one, so
|
||||||
// ErrunexpectedEOF might be returned.
|
// `io.ErrUnexpectedEOF` might be returned.
|
||||||
if err != io.EOF && err != io.ErrUnexpectedEOF {
|
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
state.Reset()
|
state.Reset()
|
||||||
return nil, state, nil, err
|
return nil, state, nil, err
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
// We must read all of the entries if WAL is opened in write mode.
|
// We must read all the entries if WAL is opened in write mode.
|
||||||
if err != io.EOF {
|
if !errors.Is(err, io.EOF) {
|
||||||
state.Reset()
|
state.Reset()
|
||||||
return nil, state, nil, err
|
return nil, state, nil, err
|
||||||
}
|
}
|
||||||
@ -609,7 +609,7 @@ func ValidSnapshotEntries(lg *zap.Logger, walDir string) ([]walpb.Snapshot, erro
|
|||||||
}
|
}
|
||||||
// We do not have to read out all the WAL entries
|
// We do not have to read out all the WAL entries
|
||||||
// as the decoder is opened in read mode.
|
// as the decoder is opened in read mode.
|
||||||
if err != io.EOF && err != io.ErrUnexpectedEOF {
|
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -699,7 +699,7 @@ func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) (*raftpb.HardSta
|
|||||||
|
|
||||||
// We do not have to read out all the WAL entries
|
// We do not have to read out all the WAL entries
|
||||||
// as the decoder is opened in read mode.
|
// as the decoder is opened in read mode.
|
||||||
if err != io.EOF && err != io.ErrUnexpectedEOF {
|
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
"go.uber.org/zap/zaptest"
|
"go.uber.org/zap/zaptest"
|
||||||
|
|
||||||
"go.etcd.io/etcd/client/pkg/v3/fileutil"
|
"go.etcd.io/etcd/client/pkg/v3/fileutil"
|
||||||
@ -1068,3 +1069,79 @@ func TestValidSnapshotEntriesAfterPurgeWal(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLastRecordLengthExceedFileEnd(t *testing.T) {
|
||||||
|
/* The data below was generated by code something like below. The length
|
||||||
|
* of the last record was intentionally changed to 1000 in order to make
|
||||||
|
* sure it exceeds the end of the file.
|
||||||
|
*
|
||||||
|
* for i := 0; i < 3; i++ {
|
||||||
|
* es := []raftpb.Entry{{Index: uint64(i + 1), Data: []byte(fmt.Sprintf("waldata%d", i+1))}}
|
||||||
|
* if err = w.Save(raftpb.HardState{}, es); err != nil {
|
||||||
|
* t.Fatal(err)
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ......
|
||||||
|
* var sb strings.Builder
|
||||||
|
* for _, ch := range buf {
|
||||||
|
* sb.WriteString(fmt.Sprintf("\\x%02x", ch))
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
// Generate WAL file
|
||||||
|
t.Log("Generate a WAL file with the last record's length modified.")
|
||||||
|
data := []byte("\x04\x00\x00\x00\x00\x00\x00\x84\x08\x04\x10\x00\x00" +
|
||||||
|
"\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x84\x08\x01\x10\x00\x00" +
|
||||||
|
"\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x82\x08\x05\x10\xa0\xb3" +
|
||||||
|
"\x9b\x8f\x08\x1a\x04\x08\x00\x10\x00\x00\x00\x1a\x00\x00\x00\x00" +
|
||||||
|
"\x00\x00\x86\x08\x02\x10\xba\x8b\xdc\x85\x0f\x1a\x10\x08\x00\x10" +
|
||||||
|
"\x00\x18\x01\x22\x08\x77\x61\x6c\x64\x61\x74\x61\x31\x00\x00\x00" +
|
||||||
|
"\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x86\x08\x02\x10\xa1\xe8" +
|
||||||
|
"\xff\x9c\x02\x1a\x10\x08\x00\x10\x00\x18\x02\x22\x08\x77\x61\x6c" +
|
||||||
|
"\x64\x61\x74\x61\x32\x00\x00\x00\x00\x00\x00\xe8\x03\x00\x00\x00" +
|
||||||
|
"\x00\x00\x86\x08\x02\x10\xa1\x9c\xa1\xaa\x04\x1a\x10\x08\x00\x10" +
|
||||||
|
"\x00\x18\x03\x22\x08\x77\x61\x6c\x64\x61\x74\x61\x33\x00\x00\x00" +
|
||||||
|
"\x00\x00\x00")
|
||||||
|
|
||||||
|
buf := bytes.NewBuffer(data)
|
||||||
|
f, err := createFileWithData(t, buf)
|
||||||
|
fileName := f.Name()
|
||||||
|
require.NoError(t, err)
|
||||||
|
t.Logf("fileName: %v", fileName)
|
||||||
|
|
||||||
|
// Verify low-level decoder directly
|
||||||
|
t.Log("Verify all records can be parsed correctly.")
|
||||||
|
rec := &walpb.Record{}
|
||||||
|
decoder := NewDecoder(fileutil.NewFileReader(f))
|
||||||
|
for {
|
||||||
|
if err = decoder.Decode(rec); err != nil {
|
||||||
|
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if rec.Type == EntryType {
|
||||||
|
e := MustUnmarshalEntry(rec.Data)
|
||||||
|
t.Logf("Validating normal entry: %v", e)
|
||||||
|
recData := fmt.Sprintf("waldata%d", e.Index)
|
||||||
|
require.Equal(t, raftpb.EntryNormal, e.Type)
|
||||||
|
require.Equal(t, recData, string(e.Data))
|
||||||
|
}
|
||||||
|
rec = &walpb.Record{}
|
||||||
|
}
|
||||||
|
require.NoError(t, f.Close())
|
||||||
|
|
||||||
|
// Verify w.ReadAll() returns io.ErrUnexpectedEOF in the error chain.
|
||||||
|
t.Log("Verify the w.ReadAll returns io.ErrUnexpectedEOF in the error chain")
|
||||||
|
newFileName := filepath.Join(filepath.Dir(fileName), "0000000000000000-0000000000000000.wal")
|
||||||
|
require.NoError(t, os.Rename(fileName, newFileName))
|
||||||
|
|
||||||
|
w, err := Open(zaptest.NewLogger(t), filepath.Dir(fileName), walpb.Snapshot{
|
||||||
|
Index: 0,
|
||||||
|
Term: 0,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer w.Close()
|
||||||
|
|
||||||
|
_, _, _, err = w.ReadAll()
|
||||||
|
// Note: The wal file will be repaired automatically in production
|
||||||
|
// environment, but only once.
|
||||||
|
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
|
||||||
|
}
|
||||||
|
@ -69,6 +69,9 @@ func readRaw(fromIndex *uint64, waldir string, out io.Writer) {
|
|||||||
if errors.Is(err, io.EOF) {
|
if errors.Is(err, io.EOF) {
|
||||||
fmt.Fprintf(out, "EOF: All entries were processed.\n")
|
fmt.Fprintf(out, "EOF: All entries were processed.\n")
|
||||||
break
|
break
|
||||||
|
} else if errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
|
fmt.Fprintf(out, "ErrUnexpectedEOF: The last record might be corrupted, error: %v.\n", err)
|
||||||
|
break
|
||||||
} else {
|
} else {
|
||||||
log.Printf("Error: Reading failed: %v", err)
|
log.Printf("Error: Reading failed: %v", err)
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user