From d814e9dc3571e27ab305859ce46fc8b08468c0e0 Mon Sep 17 00:00:00 2001 From: Anthony Romano Date: Thu, 21 Apr 2016 20:47:42 -0700 Subject: [PATCH 1/2] integration: wait for ReadyNotify in Issue3699 test Fixes #5147 --- integration/cluster.go | 2 +- integration/cluster_test.go | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/integration/cluster.go b/integration/cluster.go index 4798aeb70..aa9855bc1 100644 --- a/integration/cluster.go +++ b/integration/cluster.go @@ -329,11 +329,11 @@ func (c *cluster) waitLeader(t *testing.T, membs []*member) int { } if lead != 0 && lead != m.s.Lead() { lead = 0 + time.Sleep(10 * tickDuration) break } lead = m.s.Lead() } - time.Sleep(10 * tickDuration) } for i, m := range membs { diff --git a/integration/cluster_test.go b/integration/cluster_test.go index 4d7e9e033..69f9d426c 100644 --- a/integration/cluster_test.go +++ b/integration/cluster_test.go @@ -21,6 +21,7 @@ import ( "os" "strconv" "testing" + "time" "github.com/coreos/etcd/client" "github.com/coreos/etcd/pkg/testutil" @@ -301,7 +302,6 @@ func TestIssue3699(t *testing.T) { // make node a unavailable c.Members[0].Stop(t) - <-c.Members[0].s.StopNotify() // add node d c.AddMember(t) @@ -317,11 +317,16 @@ func TestIssue3699(t *testing.T) { // bring back node a // node a will remain useless as long as d is the leader. - err := c.Members[0].Restart(t) + if err := c.Members[0].Restart(t); err != nil { + t.Fatal(err) + } select { + // waiting for ReadyNotify can take several seconds + case <-time.After(10 * time.Second): + t.Fatalf("waited too long for ready notification") case <-c.Members[0].s.StopNotify(): t.Fatalf("should not be stopped") - default: + case <-c.Members[0].s.ReadyNotify(): } // must waitLeader so goroutines don't leak on terminate c.waitLeader(t, c.Members) @@ -330,11 +335,10 @@ func TestIssue3699(t *testing.T) { cc := mustNewHTTPClient(t, []string{c.URL(0)}, c.cfg.ClientTLS) kapi := client.NewKeysAPI(cc) ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) - _, err = kapi.Set(ctx, "/foo", "bar", nil) - cancel() - if err != nil { + if _, err := kapi.Set(ctx, "/foo", "bar", nil); err != nil { t.Fatalf("unexpected error on Set (%v)", err) } + cancel() } // clusterMustProgress ensures that cluster can make progress. It creates From 8291110049797716d2dd51251f0b05dfe8fb07f5 Mon Sep 17 00:00:00 2001 From: Anthony Romano Date: Wed, 27 Apr 2016 22:05:59 -0700 Subject: [PATCH 2/2] rafthttp: do not create new connections after stopping transport --- rafthttp/transport.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rafthttp/transport.go b/rafthttp/transport.go index 3c4ce3c19..66f83c9a2 100644 --- a/rafthttp/transport.go +++ b/rafthttp/transport.go @@ -202,11 +202,19 @@ func (t *Transport) Stop() { if tr, ok := t.pipelineRt.(*http.Transport); ok { tr.CloseIdleConnections() } + t.peers = nil + t.remotes = nil } func (t *Transport) AddRemote(id types.ID, us []string) { t.mu.Lock() defer t.mu.Unlock() + if t.remotes == nil { + // there's no clean way to shutdown the golang http server + // (see: https://github.com/golang/go/issues/4674) before + // stopping the transport; ignore any new connections. + return + } if _, ok := t.peers[id]; ok { return } @@ -223,6 +231,9 @@ func (t *Transport) AddRemote(id types.ID, us []string) { func (t *Transport) AddPeer(id types.ID, us []string) { t.mu.Lock() defer t.mu.Unlock() + if t.peers == nil { + panic("transport stopped") + } if _, ok := t.peers[id]; ok { return }