From d0af96d5583074799e4c9c7dcd9af62a5bc58e4a Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Thu, 29 Jan 2015 11:35:55 -0800 Subject: [PATCH 1/3] etcdctl/backup_command: save snapshot mark in new wal --- etcdctl/command/backup_command.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/etcdctl/command/backup_command.go b/etcdctl/command/backup_command.go index 5e574c3d9..82d7bda33 100644 --- a/etcdctl/command/backup_command.go +++ b/etcdctl/command/backup_command.go @@ -88,4 +88,7 @@ func handleBackup(c *cli.Context) { if err := neww.Save(state, ents); err != nil { log.Fatal(err) } + if err := neww.SaveSnapshot(walsnap); err != nil { + log.Fatal(err) + } } From 7840d49ae0d420c622a74e1b86e4d663f08b0d02 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Thu, 29 Jan 2015 11:36:26 -0800 Subject: [PATCH 2/3] etcdserver: not add self to transporter based on local ID If this is decided by local name, it comes to trouble if the name is duplicate in the cluster. --- etcdserver/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etcdserver/server.go b/etcdserver/server.go index 264b28b44..856f5138c 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -253,7 +253,7 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) { tr := rafthttp.NewTransporter(cfg.Transport, id, cfg.Cluster.ID(), srv, srv.errorc, sstats, lstats) // add all the remote members into sendhub for _, m := range cfg.Cluster.Members() { - if m.Name != cfg.Name { + if m.ID != id { tr.AddPeer(m.ID, m.PeerURLs) } } From e966e565c48c85fb9e9fcdc1ad9bce9138c95bff Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Thu, 29 Jan 2015 11:38:52 -0800 Subject: [PATCH 3/3] etcdctl/backup_command: handle datadir with missed snapshot mark This helps to recover from the data dir created in v2.0.0-rc1. --- etcdctl/command/backup_command.go | 8 +++++++- wal/wal.go | 8 ++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/etcdctl/command/backup_command.go b/etcdctl/command/backup_command.go index 82d7bda33..ed2488e40 100644 --- a/etcdctl/command/backup_command.go +++ b/etcdctl/command/backup_command.go @@ -15,6 +15,7 @@ package command import ( + "fmt" "log" "os" "path" @@ -71,7 +72,12 @@ func handleBackup(c *cli.Context) { } defer w.Close() wmetadata, state, ents, err := w.ReadAll() - if err != nil { + switch err { + case nil: + case wal.ErrSnapshotNotFound: + fmt.Printf("Failed to find the match snapshot record %+v in wal %v.", walsnap, srcWAL) + fmt.Printf("etcdctl will add it back. Start auto fixing...") + default: log.Fatal(err) } var metadata etcdserverpb.Metadata diff --git a/wal/wal.go b/wal/wal.go index 61fde9349..93aff5f6c 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -203,7 +203,7 @@ func openAtIndex(dirpath string, snap walpb.Snapshot, all bool) (*WAL, error) { // ReadAll reads out all records of the current WAL. // If it cannot read out the expected snap, it will return ErrSnapshotNotFound. // If loaded snap doesn't match with the expected one, it will return -// ErrSnapshotMismatch. +// all the records and error ErrSnapshotMismatch. // TODO: detect not-last-snap error. // TODO: maybe loose the checking of match. // After ReadAll, the WAL will be ready for appending new records. @@ -256,9 +256,9 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb. state.Reset() return nil, state, nil, err } + err = nil if !match { - state.Reset() - return nil, state, nil, ErrSnapshotNotFound + err = ErrSnapshotNotFound } // close decoder, disable reading @@ -269,7 +269,7 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb. // create encoder (chain crc with the decoder), enable appending w.encoder = newEncoder(w.f, w.decoder.lastCRC()) w.decoder = nil - return metadata, state, ents, nil + return metadata, state, ents, err } // Cut closes current file written and creates a new one ready to append.