mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #5368 from heyitsanthony/sshot-hash
v3rpc, etcdctl: snapshot integrity hash
This commit is contained in:
commit
f6e5fe6877
@ -18,6 +18,8 @@ $ etcdctl --endpoints $ENDPOINT snapshot save snapshot.db
|
||||
|
||||
To restore a cluster, all that is needed is a single snapshot "db" file. A cluster restore with `etcdctl snapshot restore` creates new etcd data directories; all members should restore using the same snapshot. Restoring overwrites some snapshot metadata (specifically, the member ID and cluster ID); the member loses its former identity. This metadata overwrite prevents the new member from inadvertently joining an existing cluster. Therefore in order to start a cluster from a snapshot, the restore must start a new logical cluster.
|
||||
|
||||
Snapshot integrity may be optionally verified at restore time. If the snapshot is taken with `etcdctl snapshot save`, it will have an integrity hash that is checked by `etcdctl snapshot restore`. If the snapshot is copied from the data directory, there is no integrity hash and it will only restore by using `--skip-hash-check`.
|
||||
|
||||
A restore initializes a new member of a new cluster, with a fresh cluster configuration using `etcd`'s cluster configuration flags, but preserves the contents of the etcd keyspace. Continuing from the previous example, the following creates new etcd data directories (`m1.etcd`, `m2.etcd`, `m3.etcd`) for a three member cluster:
|
||||
|
||||
```sh
|
||||
|
@ -52,6 +52,38 @@ func snapshotTest(cx ctlCtx) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCtlV3SnapshotCorrupt(t *testing.T) { testCtl(t, snapshotCorruptTest) }
|
||||
|
||||
func snapshotCorruptTest(cx ctlCtx) {
|
||||
fpath := "test.snapshot"
|
||||
defer os.RemoveAll(fpath)
|
||||
|
||||
if err := ctlV3SnapshotSave(cx, fpath); err != nil {
|
||||
cx.t.Fatalf("snapshotTest ctlV3SnapshotSave error (%v)", err)
|
||||
}
|
||||
|
||||
// corrupt file
|
||||
f, oerr := os.OpenFile(fpath, os.O_WRONLY, 0)
|
||||
if oerr != nil {
|
||||
cx.t.Fatal(oerr)
|
||||
}
|
||||
if _, err := f.Write(make([]byte, 512)); err != nil {
|
||||
cx.t.Fatal(err)
|
||||
}
|
||||
f.Close()
|
||||
|
||||
defer os.RemoveAll("snap.etcd")
|
||||
serr := spawnWithExpect(
|
||||
append(cx.PrefixArgs(), "snapshot", "restore",
|
||||
"--data-dir", "snap.etcd",
|
||||
fpath),
|
||||
"expected sha256")
|
||||
|
||||
if serr != nil {
|
||||
cx.t.Fatal(serr)
|
||||
}
|
||||
}
|
||||
|
||||
func ctlV3SnapshotSave(cx ctlCtx, fpath string) error {
|
||||
cmdArgs := append(cx.PrefixArgs(), "snapshot", "save", fpath)
|
||||
return spawnWithExpect(cmdArgs, fmt.Sprintf("Snapshot saved at %s", fpath))
|
||||
|
@ -15,6 +15,7 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
@ -22,6 +23,7 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
@ -50,6 +52,7 @@ var (
|
||||
restoreDataDir string
|
||||
restorePeerURLs string
|
||||
restoreName string
|
||||
skipHashCheck bool
|
||||
)
|
||||
|
||||
// NewSnapshotCommand returns the cobra command for "snapshot".
|
||||
@ -94,6 +97,7 @@ func NewSnapshotRestoreCommand() *cobra.Command {
|
||||
cmd.Flags().StringVar(&restoreClusterToken, "initial-cluster-token", "etcd-cluster", "Initial cluster token for the etcd cluster during restore bootstrap.")
|
||||
cmd.Flags().StringVar(&restorePeerURLs, "initial-advertise-peer-urls", defaultInitialAdvertisePeerURLs, "List of this member's peer URLs to advertise to the rest of the cluster.")
|
||||
cmd.Flags().StringVar(&restoreName, "name", defaultName, "Human-readable name for this member.")
|
||||
cmd.Flags().BoolVar(&skipHashCheck, "skip-hash-check", false, "Ignore snapshot integrity hash value (required if copied from data directory).")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@ -191,7 +195,7 @@ func initialClusterFromName(name string) string {
|
||||
if name == "" {
|
||||
n = defaultName
|
||||
}
|
||||
return fmt.Sprintf("%s=http://localhost:2380", n, n)
|
||||
return fmt.Sprintf("%s=http://localhost:2380", n)
|
||||
}
|
||||
|
||||
// makeWAL creates a WAL for the initial cluster
|
||||
@ -261,18 +265,65 @@ func makeDB(snapdir, dbfile string) {
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// get snapshot integrity hash
|
||||
if _, err := f.Seek(-sha256.Size, os.SEEK_END); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
sha := make([]byte, sha256.Size)
|
||||
if _, err := f.Read(sha); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
if _, err := f.Seek(0, os.SEEK_SET); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(snapdir, 0755); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
|
||||
dbpath := path.Join(snapdir, "db")
|
||||
db, dberr := os.OpenFile(dbpath, os.O_WRONLY|os.O_CREATE, 0600)
|
||||
db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600)
|
||||
if dberr != nil {
|
||||
ExitWithError(ExitIO, dberr)
|
||||
}
|
||||
if _, err := io.Copy(db, f); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
|
||||
// truncate away integrity hash, if any.
|
||||
off, serr := db.Seek(0, os.SEEK_END)
|
||||
if serr != nil {
|
||||
ExitWithError(ExitIO, serr)
|
||||
}
|
||||
hasHash := (off % 512) == sha256.Size
|
||||
if hasHash {
|
||||
if err := db.Truncate(off - sha256.Size); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
}
|
||||
|
||||
if !hasHash && !skipHashCheck {
|
||||
err := fmt.Errorf("snapshot missing hash but --skip-hash-check=false")
|
||||
ExitWithError(ExitBadArgs, err)
|
||||
}
|
||||
|
||||
if hasHash && !skipHashCheck {
|
||||
// check for match
|
||||
if _, err := db.Seek(0, os.SEEK_SET); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
h := sha256.New()
|
||||
if _, err := io.Copy(h, db); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
dbsha := h.Sum(nil)
|
||||
if !reflect.DeepEqual(sha, dbsha) {
|
||||
err := fmt.Errorf("expected sha256 %v, got %v", sha, dbsha)
|
||||
ExitWithError(ExitInvalidInput, err)
|
||||
}
|
||||
}
|
||||
|
||||
// db hash is OK, can now modify DB so it can be part of a new cluster
|
||||
db.Close()
|
||||
|
||||
// update consistentIndex so applies go through on etcdserver despite
|
||||
@ -285,6 +336,7 @@ func makeDB(snapdir, dbfile string) {
|
||||
_, _, err := s.TxnDeleteRange(id, k, nil)
|
||||
return err
|
||||
}
|
||||
|
||||
// delete stored members from old cluster since using new members
|
||||
btx.UnsafeForEach([]byte("members"), del)
|
||||
btx.UnsafeForEach([]byte("members_removed"), del)
|
||||
|
@ -15,6 +15,7 @@
|
||||
package v3rpc
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"io"
|
||||
|
||||
"github.com/coreos/etcd/etcdserver"
|
||||
@ -81,6 +82,8 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance
|
||||
pw.Close()
|
||||
}()
|
||||
|
||||
// send file data
|
||||
h := sha256.New()
|
||||
br := int64(0)
|
||||
buf := make([]byte, 32*1024)
|
||||
sz := snap.Size()
|
||||
@ -97,6 +100,14 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance
|
||||
if err = srv.Send(resp); err != nil {
|
||||
return togRPCError(err)
|
||||
}
|
||||
h.Write(buf[:n])
|
||||
}
|
||||
|
||||
// send sha
|
||||
sha := h.Sum(nil)
|
||||
hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha}
|
||||
if err := srv.Send(hresp); err != nil {
|
||||
return togRPCError(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
Loading…
x
Reference in New Issue
Block a user