From e31510975a855310f3ae57de22cd734881e02f29 Mon Sep 17 00:00:00 2001 From: Joe Betz Date: Mon, 4 Jun 2018 21:52:26 -0700 Subject: [PATCH] etcdserver: Backport snapshot recovery from #7917 to 3.1 branch --- etcdserver/backend.go | 83 +++++++++++++++++++++++++++++++++++++++++++ etcdserver/server.go | 4 +++ 2 files changed, 87 insertions(+) create mode 100644 etcdserver/backend.go diff --git a/etcdserver/backend.go b/etcdserver/backend.go new file mode 100644 index 000000000..87be548e5 --- /dev/null +++ b/etcdserver/backend.go @@ -0,0 +1,83 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdserver + +import ( + "fmt" + "os" + "path/filepath" + "time" + + "github.com/coreos/etcd/lease" + "github.com/coreos/etcd/mvcc" + "github.com/coreos/etcd/mvcc/backend" + "github.com/coreos/etcd/raft/raftpb" + "github.com/coreos/etcd/snap" +) + +func newBackend(cfg *ServerConfig) backend.Backend { + return backend.NewDefaultBackend(backendPath(cfg)) +} + +func backendPath(cfg *ServerConfig) string { + return filepath.Join(cfg.SnapDir(), "db") +} + +// openSnapshotBackend renames a snapshot db to the current etcd db and opens it. +func openSnapshotBackend(cfg *ServerConfig, ss *snap.Snapshotter, snapshot raftpb.Snapshot) (backend.Backend, error) { + snapPath, err := ss.DBFilePath(snapshot.Metadata.Index) + if err != nil { + return nil, fmt.Errorf("failed to find database snapshot file (%v)", err) + } + if err := os.Rename(snapPath, backendPath(cfg)); err != nil { + return nil, fmt.Errorf("failed to rename database snapshot file (%v)", err) + } + return openBackend(cfg), nil +} + +// openBackend returns a backend using the current etcd db. +func openBackend(cfg *ServerConfig) backend.Backend { + fn := backendPath(cfg) + beOpened := make(chan backend.Backend) + go func() { + beOpened <- newBackend(cfg) + }() + + select { + case be := <-beOpened: + return be + + case <-time.After(10 * time.Second): + plog.Warningf("another etcd process is using %q and holds the file lock, or loading backend file is taking >10 seconds", fn) + plog.Warningf("waiting for it to exit before starting...") + } + + return <-beOpened +} + +// recoverBackendSnapshot recovers the DB from a snapshot in case etcd crashes +// before updating the backend db after persisting raft snapshot to disk, +// violating the invariant snapshot.Metadata.Index < db.consistentIndex. In this +// case, replace the db with the snapshot db sent by the leader. +func recoverSnapshotBackend(cfg *ServerConfig, oldbe backend.Backend, snapshot raftpb.Snapshot) (backend.Backend, error) { + var cIndex consistentIndex + kv := mvcc.New(oldbe, &lease.FakeLessor{}, &cIndex) + defer kv.Close() + if snapshot.Metadata.Index <= kv.ConsistentIndex() { + return oldbe, nil + } + oldbe.Close() + return openSnapshotBackend(cfg, snap.New(cfg.SnapDir()), snapshot) +} diff --git a/etcdserver/server.go b/etcdserver/server.go index 329b4a10f..f54e15071 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -378,6 +378,10 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) { plog.Panicf("recovered store from snapshot error: %v", err) } plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index) + + if be, err = recoverSnapshotBackend(cfg, be, *snapshot); err != nil { + plog.Panicf("recovering backend from snapshot error: %v", err) + } } cfg.Print() if !cfg.ForceNewCluster {