restrict the max size of each WAL entry to the remaining size of the file

Currently the max size of each WAL entry is hard coded as 10MB. If users
set a value > 10MB for the flag --max-request-bytes, then etcd may run
into a situation that it successfully processes a big request, but fails
to decode it when replaying the WAL file on startup.

On the other hand, we can't just remove the limitation, because if a
WAL entry is somehow corrupted, and its recByte is a huge value, then
etcd may run out of memory. So the solution is to restrict the max size
of each WAL entry as a dynamic value, which is the remaining size of
the WAL file.

Signed-off-by: Benjamin Wang <wachao@vmware.com>
This commit is contained in:
Benjamin Wang
2022-06-17 09:01:29 +08:00
parent db0b67e8a0
commit 621cd7b9e5
5 changed files with 125 additions and 76 deletions

View File

@@ -15,12 +15,13 @@
package wal
import (
"bufio"
"encoding/binary"
"fmt"
"hash"
"io"
"sync"
"go.etcd.io/etcd/client/pkg/v3/fileutil"
"go.etcd.io/etcd/pkg/v3/crc"
"go.etcd.io/etcd/pkg/v3/pbutil"
"go.etcd.io/etcd/raft/v3/raftpb"
@@ -34,17 +35,17 @@ const frameSizeBytes = 8
type decoder struct {
mu sync.Mutex
brs []*bufio.Reader
brs []*fileutil.FileBufReader
// lastValidOff file offset following the last valid decoded record
lastValidOff int64
crc hash.Hash32
}
func newDecoder(r ...io.Reader) *decoder {
readers := make([]*bufio.Reader, len(r))
func newDecoder(r ...fileutil.FileReader) *decoder {
readers := make([]*fileutil.FileBufReader, len(r))
for i := range r {
readers[i] = bufio.NewReader(r[i])
readers[i] = fileutil.NewFileBufReader(r[i])
}
return &decoder{
brs: readers,
@@ -59,17 +60,13 @@ func (d *decoder) decode(rec *walpb.Record) error {
return d.decodeRecord(rec)
}
// raft max message size is set to 1 MB in etcd server
// assume projects set reasonable message size limit,
// thus entry size should never exceed 10 MB
const maxWALEntrySizeLimit = int64(10 * 1024 * 1024)
func (d *decoder) decodeRecord(rec *walpb.Record) error {
if len(d.brs) == 0 {
return io.EOF
}
l, err := readInt64(d.brs[0])
fileBufReader := d.brs[0]
l, err := readInt64(fileBufReader)
if err == io.EOF || (err == nil && l == 0) {
// hit end of file or preallocated space
d.brs = d.brs[1:]
@@ -84,12 +81,15 @@ func (d *decoder) decodeRecord(rec *walpb.Record) error {
}
recBytes, padBytes := decodeFrameSize(l)
if recBytes >= maxWALEntrySizeLimit-padBytes {
return ErrMaxWALEntrySizeLimitExceeded
// The length of current WAL entry must be less than the remaining file size.
maxEntryLimit := fileBufReader.FileInfo().Size() - d.lastValidOff - padBytes
if recBytes > maxEntryLimit {
return fmt.Errorf("wal: max entry size limit exceeded, recBytes: %d, fileSize(%d) - offset(%d) - padBytes(%d) = entryLimit(%d)",
recBytes, fileBufReader.FileInfo().Size(), d.lastValidOff, padBytes, maxEntryLimit)
}
data := make([]byte, recBytes+padBytes)
if _, err = io.ReadFull(d.brs[0], data); err != nil {
if _, err = io.ReadFull(fileBufReader, data); err != nil {
// ReadFull returns io.EOF only if no bytes were read
// the decoder should treat this as an ErrUnexpectedEOF instead.
if err == io.EOF {