From 73204e9637a529d0c70110ec5be925f02b45a6f9 Mon Sep 17 00:00:00 2001 From: Anthony Romano Date: Tue, 17 May 2016 14:16:29 -0700 Subject: [PATCH] etcdserver: wait for snapshots before closing raft Fixes #5374 --- etcdserver/server.go | 5 +++-- integration/cluster_test.go | 11 ++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/etcdserver/server.go b/etcdserver/server.go index 1e04a322d..7dff81697 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -542,12 +542,13 @@ func (s *EtcdServer) run() { defer func() { sched.Stop() + // wait for snapshots before closing raft so wal stays open + s.wg.Wait() + // must stop raft after scheduler-- etcdserver can leak rafthttp pipelines // by adding a peer after raft stops the transport s.r.stop() - s.wg.Wait() - // kv, lessor and backend can be nil if running without v3 enabled // or running unit tests. if s.lessor != nil { diff --git a/integration/cluster_test.go b/integration/cluster_test.go index 7a377a0b4..e11ffe454 100644 --- a/integration/cluster_test.go +++ b/integration/cluster_test.go @@ -232,9 +232,14 @@ func TestIssue2681(t *testing.T) { } // Ensure we can remove a member after a snapshot then add a new one back. -func TestIssue2746(t *testing.T) { +func TestIssue2746(t *testing.T) { testIssue2746(t, 5) } + +// With 3 nodes TestIssue2476 sometimes had a shutdown with an inflight snapshot. +func TestIssue2746WithThree(t *testing.T) { testIssue2746(t, 3) } + +func testIssue2746(t *testing.T, members int) { defer testutil.AfterTest(t) - c := NewCluster(t, 5) + c := NewCluster(t, members) for _, m := range c.Members { m.SnapCount = 10 @@ -248,7 +253,7 @@ func TestIssue2746(t *testing.T) { clusterMustProgress(t, c.Members) } - c.RemoveMember(t, uint64(c.Members[4].s.ID())) + c.RemoveMember(t, uint64(c.Members[members-1].s.ID())) c.waitLeader(t, c.Members) c.AddMember(t)