restrict the max size of each WAL entry to the remaining size of the file

Currently the max size of each WAL entry is hard coded as 10MB. If users
set a value > 10MB for the flag --max-request-bytes, then etcd may run
into a situation that it successfully processes a big request, but fails
to decode it when replaying the WAL file on startup.

On the other hand, we can't just remove the limitation, because if a
WAL entry is somehow corrupted, and its recByte is a huge value, then
etcd may run out of memory. So the solution is to restrict the max size
of each WAL entry as a dynamic value, which is the remaining size of
the WAL file.

Signed-off-by: Benjamin Wang <wachao@vmware.com>
This commit is contained in:
Benjamin Wang
2022-06-17 09:01:29 +08:00
parent db0b67e8a0
commit 621cd7b9e5
5 changed files with 125 additions and 76 deletions

View File

@@ -20,6 +20,7 @@ import (
"io"
"io/ioutil"
"math"
"math/rand"
"os"
"path"
"path/filepath"
@@ -335,7 +336,7 @@ func TestCut(t *testing.T) {
}
defer f.Close()
nw := &WAL{
decoder: newDecoder(f),
decoder: newDecoder(fileutil.NewFileReader(f)),
start: snap,
}
_, gst, _, err := nw.ReadAll()
@@ -411,51 +412,76 @@ func TestSaveWithCut(t *testing.T) {
}
func TestRecover(t *testing.T) {
p, err := ioutil.TempDir(t.TempDir(), "waltest")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(p)
w, err := Create(zap.NewExample(), p, []byte("metadata"))
if err != nil {
t.Fatal(err)
}
if err = w.SaveSnapshot(walpb.Snapshot{}); err != nil {
t.Fatal(err)
}
ents := []raftpb.Entry{{Index: 1, Term: 1, Data: []byte{1}}, {Index: 2, Term: 2, Data: []byte{2}}}
if err = w.Save(raftpb.HardState{}, ents); err != nil {
t.Fatal(err)
}
sts := []raftpb.HardState{{Term: 1, Vote: 1, Commit: 1}, {Term: 2, Vote: 2, Commit: 2}}
for _, s := range sts {
if err = w.Save(s, nil); err != nil {
t.Fatal(err)
}
}
w.Close()
if w, err = Open(zap.NewExample(), p, walpb.Snapshot{}); err != nil {
t.Fatal(err)
}
metadata, state, entries, err := w.ReadAll()
if err != nil {
t.Fatal(err)
cases := []struct {
name string
size int
}{
{
name: "10MB",
size: 10 * 1024 * 1024,
},
{
name: "20MB",
size: 20 * 1024 * 1024,
},
{
name: "40MB",
size: 40 * 1024 * 1024,
},
}
if !bytes.Equal(metadata, []byte("metadata")) {
t.Errorf("metadata = %s, want %s", metadata, "metadata")
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
p := t.TempDir()
w, err := Create(zap.NewExample(), p, []byte("metadata"))
if err != nil {
t.Fatal(err)
}
if err = w.SaveSnapshot(walpb.Snapshot{}); err != nil {
t.Fatal(err)
}
data := make([]byte, tc.size)
n, err := rand.Read(data)
assert.Equal(t, tc.size, n)
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
ents := []raftpb.Entry{{Index: 1, Term: 1, Data: data}, {Index: 2, Term: 2, Data: data}}
if err = w.Save(raftpb.HardState{}, ents); err != nil {
t.Fatal(err)
}
sts := []raftpb.HardState{{Term: 1, Vote: 1, Commit: 1}, {Term: 2, Vote: 2, Commit: 2}}
for _, s := range sts {
if err = w.Save(s, nil); err != nil {
t.Fatal(err)
}
}
w.Close()
if w, err = Open(zap.NewExample(), p, walpb.Snapshot{}); err != nil {
t.Fatal(err)
}
metadata, state, entries, err := w.ReadAll()
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(metadata, []byte("metadata")) {
t.Errorf("metadata = %s, want %s", metadata, "metadata")
}
if !reflect.DeepEqual(entries, ents) {
t.Errorf("ents = %+v, want %+v", entries, ents)
}
// only the latest state is recorded
s := sts[len(sts)-1]
if !reflect.DeepEqual(state, s) {
t.Errorf("state = %+v, want %+v", state, s)
}
w.Close()
})
}
if !reflect.DeepEqual(entries, ents) {
t.Errorf("ents = %+v, want %+v", entries, ents)
}
// only the latest state is recorded
s := sts[len(sts)-1]
if !reflect.DeepEqual(state, s) {
t.Errorf("state = %+v, want %+v", state, s)
}
w.Close()
}
func TestSearchIndex(t *testing.T) {