From f7f7e9c7623a8f2c115126e79be5cee5b0732e83 Mon Sep 17 00:00:00 2001 From: jcoutin Date: Thu, 2 May 2019 10:35:07 +0100 Subject: [PATCH] wal: Improve cleanup for robustness and debuggability Rename wal with '.suffix.' instead of delete it and call cleanup when perr in a 'defer'ed statement. --- wal/wal.go | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/wal/wal.go b/wal/wal.go index 966a24057..a86a48800 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -184,6 +184,13 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { return nil, err } + var perr error + defer func() { + if perr != nil { + w.cleanupWAL(lg) + } + }() + // directory was renamed; sync parent dir to persist rename pdir, perr := fileutil.OpenDir(filepath.Dir(w.dir)) if perr != nil { @@ -195,7 +202,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } - w.cleanupWAL(lg) return nil, perr } if perr = fileutil.Fsync(pdir); perr != nil { @@ -207,7 +213,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } - w.cleanupWAL(lg) return nil, perr } if perr = pdir.Close(); perr != nil { @@ -219,7 +224,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { zap.Error(perr), ) } - w.cleanupWAL(lg) return nil, perr } @@ -230,16 +234,22 @@ func (w *WAL) cleanupWAL(lg *zap.Logger) { var err error if err = w.Close(); err != nil { if lg != nil { - lg.Panic("failed to cleanup WAL", zap.Error(err)) + lg.Panic("failed to closeup WAL during cleanup", zap.Error(err)) } else { - plog.Panicf("failed to cleanup WAL: %v", err) + plog.Panicf("failed to closeup WAL during cleanup: %v", err) } } - if err = os.RemoveAll(w.dir); err != nil { + brokenDirName := fmt.Sprintf("%s.broken.%v", w.dir, time.Now().Format("20060102.150405.999999")) + if err = os.Rename(w.dir, brokenDirName); err != nil { if lg != nil { - lg.Panic("failed to cleanup WAL", zap.Error(err)) + lg.Panic( + "failed to rename WAL during cleanup", + zap.Error(err), + zap.String("source-path", w.dir), + zap.String("rename-path", brokenDirName), + ) } else { - plog.Panicf("failed to cleanup WAL: %v", err) + plog.Panicf("failed to rename WAL during cleanup: %v", err) } } }