From 51035bfd8469c6ccf5289086bfac138906684581 Mon Sep 17 00:00:00 2001 From: Joshua Coutinho Date: Sun, 28 Apr 2019 16:56:44 +0000 Subject: [PATCH 1/3] wal: cleanup wal directory if creation fails delete /member/wal if any operation after the rename in wal.Create fails to avoid reading an inconsistent WAL on restart. Fixes #10688 --- wal/wal.go | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/wal/wal.go b/wal/wal.go index 4ef11b9f6..966a24057 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -195,6 +195,7 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } + w.cleanupWAL(lg) return nil, perr } if perr = fileutil.Fsync(pdir); perr != nil { @@ -206,9 +207,10 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } + w.cleanupWAL(lg) return nil, perr } - if perr = pdir.Close(); err != nil { + if perr = pdir.Close(); perr != nil { if lg != nil { lg.Warn( "failed to close the parent data directory file", @@ -217,12 +219,31 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } + w.cleanupWAL(lg) return nil, perr } return w, nil } +func (w *WAL) cleanupWAL(lg *zap.Logger) { + var err error + if err = w.Close(); err != nil { + if lg != nil { + lg.Panic("failed to cleanup WAL", zap.Error(err)) + } else { + plog.Panicf("failed to cleanup WAL: %v", err) + } + } + if err = os.RemoveAll(w.dir); err != nil { + if lg != nil { + lg.Panic("failed to cleanup WAL", zap.Error(err)) + } else { + plog.Panicf("failed to cleanup WAL: %v", err) + } + } +} + func (w *WAL) renameWAL(tmpdirpath string) (*WAL, error) { if err := os.RemoveAll(w.dir); err != nil { return nil, err From f7f7e9c7623a8f2c115126e79be5cee5b0732e83 Mon Sep 17 00:00:00 2001 From: jcoutin Date: Thu, 2 May 2019 10:35:07 +0100 Subject: [PATCH 2/3] wal: Improve cleanup for robustness and debuggability Rename wal with '.suffix.' instead of delete it and call cleanup when perr in a 'defer'ed statement. --- wal/wal.go | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/wal/wal.go b/wal/wal.go index 966a24057..a86a48800 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -184,6 +184,13 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { return nil, err } + var perr error + defer func() { + if perr != nil { + w.cleanupWAL(lg) + } + }() + // directory was renamed; sync parent dir to persist rename pdir, perr := fileutil.OpenDir(filepath.Dir(w.dir)) if perr != nil { @@ -195,7 +202,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } - w.cleanupWAL(lg) return nil, perr } if perr = fileutil.Fsync(pdir); perr != nil { @@ -207,7 +213,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } - w.cleanupWAL(lg) return nil, perr } if perr = pdir.Close(); perr != nil { @@ -219,7 +224,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } - w.cleanupWAL(lg) return nil, perr } @@ -230,16 +234,22 @@ func (w *WAL) cleanupWAL(lg *zap.Logger) { var err error if err = w.Close(); err != nil { if lg != nil { - lg.Panic("failed to cleanup WAL", zap.Error(err)) + lg.Panic("failed to closeup WAL during cleanup", zap.Error(err)) } else { - plog.Panicf("failed to cleanup WAL: %v", err) + plog.Panicf("failed to closeup WAL during cleanup: %v", err) } } - if err = os.RemoveAll(w.dir); err != nil { + brokenDirName := fmt.Sprintf("%s.broken.%v", w.dir, time.Now().Format("20060102.150405.999999")) + if err = os.Rename(w.dir, brokenDirName); err != nil { if lg != nil { - lg.Panic("failed to cleanup WAL", zap.Error(err)) + lg.Panic( + "failed to rename WAL during cleanup", + zap.Error(err), + zap.String("source-path", w.dir), + zap.String("rename-path", brokenDirName), + ) } else { - plog.Panicf("failed to cleanup WAL: %v", err) + plog.Panicf("failed to rename WAL during cleanup: %v", err) } } } From a0c889d14b6f59f65afe031fb4a8886a0c372d60 Mon Sep 17 00:00:00 2001 From: Joshua Coutinho Date: Fri, 10 May 2019 20:37:08 +0100 Subject: [PATCH 3/3] wal: add a test for wal cleanup, improve comments To add test coverage of wal cleanup. --- wal/wal.go | 4 ++-- wal/wal_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/wal/wal.go b/wal/wal.go index a86a48800..c01e36964 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -234,9 +234,9 @@ func (w *WAL) cleanupWAL(lg *zap.Logger) { var err error if err = w.Close(); err != nil { if lg != nil { - lg.Panic("failed to closeup WAL during cleanup", zap.Error(err)) + lg.Panic("failed to close WAL during cleanup", zap.Error(err)) } else { - plog.Panicf("failed to closeup WAL during cleanup: %v", err) + plog.Panicf("failed to close WAL during cleanup: %v", err) } } brokenDirName := fmt.Sprintf("%s.broken.%v", w.dir, time.Now().Format("20060102.150405.999999")) diff --git a/wal/wal_test.go b/wal/wal_test.go index 7ada82d4d..6e79a03fe 100644 --- a/wal/wal_test.go +++ b/wal/wal_test.go @@ -16,6 +16,7 @@ package wal import ( "bytes" + "fmt" "io" "io/ioutil" "math" @@ -23,6 +24,7 @@ import ( "path" "path/filepath" "reflect" + "regexp" "testing" "go.etcd.io/etcd/v3/pkg/fileutil" @@ -101,6 +103,37 @@ func TestCreateFailFromPollutedDir(t *testing.T) { } } +func TestWalCleanup(t *testing.T) { + testRoot, err := ioutil.TempDir(os.TempDir(), "waltestroot") + if err != nil { + t.Fatal(err) + } + p, err := ioutil.TempDir(testRoot, "waltest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(testRoot) + + logger := zap.NewExample() + w, err := Create(logger, p, []byte("")) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + w.cleanupWAL(logger) + fnames, err := fileutil.ReadDir(testRoot) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + if len(fnames) != 1 { + t.Fatalf("expected 1 file under %v, got %v", testRoot, len(fnames)) + } + pattern := fmt.Sprintf(`%s.broken\.[\d]{8}\.[\d]{6}\.[\d]{1,6}?`, filepath.Base(p)) + match, _ := regexp.MatchString(pattern, fnames[0]) + if !match { + t.Errorf("match = false, expected true for %v with pattern %v", fnames[0], pattern) + } +} + func TestCreateFailFromNoSpaceLeft(t *testing.T) { p, err := ioutil.TempDir(os.TempDir(), "waltest") if err != nil {