From 5089bf58fb3c49652c40bcfc4ba191502a2250cc Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Tue, 23 Aug 2016 14:48:07 -0700 Subject: [PATCH] wal: hold file lock while renaming WAL directory on non-Windows Windows requires this lock to be released before the directory is renamed. But on unix-like operating systems, releasing the lock and trying to reacquire it immediately can be flaky if a process is forked around the same time. The file descriptors are marked as close-on-exec by the Go runtime, but there is a window between the fork and exec where another process will be holding the lock. --- wal/wal.go | 17 +---------------- wal/wal_test.go | 2 +- wal/wal_unix.go | 38 ++++++++++++++++++++++++++++++++++++++ wal/wal_windows.go | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 wal/wal_unix.go create mode 100644 wal/wal_windows.go diff --git a/wal/wal.go b/wal/wal.go index 8ed61b54a..ff720880e 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -129,22 +129,7 @@ func Create(dirpath string, metadata []byte) (*WAL, error) { return nil, err } - // rename of directory with locked files doesn't work on windows; close - // the WAL to release the locks so the directory can be renamed - w.Close() - if err := os.Rename(tmpdirpath, dirpath); err != nil { - return nil, err - } - // reopen and relock - newWAL, oerr := Open(dirpath, walpb.Snapshot{}) - if oerr != nil { - return nil, oerr - } - if _, _, _, err := newWAL.ReadAll(); err != nil { - newWAL.Close() - return nil, err - } - return newWAL, nil + return w.renameWal(tmpdirpath) } // Open opens the WAL at the given snap. diff --git a/wal/wal_test.go b/wal/wal_test.go index 5a688917c..7dc19f8af 100644 --- a/wal/wal_test.go +++ b/wal/wal_test.go @@ -666,7 +666,7 @@ func TestOpenOnTornWrite(t *testing.T) { } } - fn := w.tail().Name() + fn := path.Join(p, path.Base(w.tail().Name())) w.Close() // clobber some entry with 0's to simulate a torn write diff --git a/wal/wal_unix.go b/wal/wal_unix.go new file mode 100644 index 000000000..101ea6acc --- /dev/null +++ b/wal/wal_unix.go @@ -0,0 +1,38 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !windows + +package wal + +import "os" + +func (w *WAL) renameWal(tmpdirpath string) (*WAL, error) { + // On non-Windows platforms, hold the lock while renaming. Releasing + // the lock and trying to reacquire it quickly can be flaky because + // it's possible the process will fork to spawn a process while this is + // happening. The fds are set up as close-on-exec by the Go runtime, + // but there is a window between the fork and the exec where another + // process holds the lock. + + if err := os.RemoveAll(w.dir); err != nil { + return nil, err + } + if err := os.Rename(tmpdirpath, w.dir); err != nil { + return nil, err + } + + w.fp = newFilePipeline(w.dir, SegmentSizeBytes) + return w, nil +} diff --git a/wal/wal_windows.go b/wal/wal_windows.go new file mode 100644 index 000000000..0b9e434cf --- /dev/null +++ b/wal/wal_windows.go @@ -0,0 +1,41 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "os" + + "github.com/coreos/etcd/wal/walpb" +) + +func (w *WAL) renameWal(tmpdirpath string) (*WAL, error) { + // rename of directory with locked files doesn't work on + // windows; close the WAL to release the locks so the directory + // can be renamed + w.Close() + if err := os.Rename(tmpdirpath, w.dir); err != nil { + return nil, err + } + // reopen and relock + newWAL, oerr := Open(w.dir, walpb.Snapshot{}) + if oerr != nil { + return nil, oerr + } + if _, _, _, err := newWAL.ReadAll(); err != nil { + newWAL.Close() + return nil, err + } + return newWAL, nil +}