From 70b5ef1d3af18926e0228a94d562376a89afd358 Mon Sep 17 00:00:00 2001 From: Piotr Tabor Date: Mon, 11 Jan 2021 12:01:22 +0100 Subject: [PATCH] Fix tests flakiness: in particular TestIssue6361. The root reason of flakes, was that server was considered as ready to early. In particular: ``` ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:44.474+0100","caller":"rafthttp/stream.go:274","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream Message","local-member-id":"ed5f620d34a8e61b","remote-peer-id":"ca50e9357181d758"} ../../bin/etcd-2456648: {"level":"warn","ts":"2021-01-11T09:56:49.040+0100","caller":"etcdserver/server.go:1942","msg":"failed to publish local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","publish-timeout":"7s","error":"etcdserver: request timed out, possibly due to connection lost"} ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:49.049+0100","caller":"etcdserver/server.go:1921","msg":"published local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","cluster-id":"34f27e83b3bc2ff","publish-timeout":"7s"} ``` was taking 5s. If this was happening concurrently with etcdctl, the etcdctl could timeout. The fix, requires servers to report 'ready to serve client requests' to consider them up. Fixed also some whitelisted 'goroutines'. --- pkg/testutil/leak.go | 2 ++ tests/e2e/ctl_v3_auth_test.go | 2 +- tests/e2e/ctl_v3_snapshot_test.go | 6 +++--- tests/e2e/ctl_v3_test.go | 4 ++-- tests/e2e/etcd_process.go | 2 +- tests/e2e/v3_curl_test.go | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/testutil/leak.go b/pkg/testutil/leak.go index 0d85c0c7c..4b7a4a9b5 100644 --- a/pkg/testutil/leak.go +++ b/pkg/testutil/leak.go @@ -111,10 +111,12 @@ func interestingGoroutines() (gs []string) { if stack == "" || strings.Contains(stack, "sync.(*WaitGroup).Done") || strings.Contains(stack, "os.(*file).close") || + strings.Contains(stack, "os.(*Process).Release") || strings.Contains(stack, "created by os/signal.init") || strings.Contains(stack, "runtime/panic.go") || strings.Contains(stack, "created by testing.RunTests") || strings.Contains(stack, "created by testing.runTests") || + strings.Contains(stack, "created by testing.(*T).Run") || strings.Contains(stack, "testing.Main(") || strings.Contains(stack, "runtime.goexit") || strings.Contains(stack, "go.etcd.io/etcd/pkg/v3/testutil.interestingGoroutines") || diff --git a/tests/e2e/ctl_v3_auth_test.go b/tests/e2e/ctl_v3_auth_test.go index a0eab7cbe..bee66e6d9 100644 --- a/tests/e2e/ctl_v3_auth_test.go +++ b/tests/e2e/ctl_v3_auth_test.go @@ -25,7 +25,7 @@ import ( "go.etcd.io/etcd/client/v3" ) -func TestCtlV3AuthEnable(t *testing.T) { +func TestCtlV3AuthEnable(t *testing.T) { testCtl(t, authEnableTest) } func TestCtlV3AuthDisable(t *testing.T) { testCtl(t, authDisableTest) } diff --git a/tests/e2e/ctl_v3_snapshot_test.go b/tests/e2e/ctl_v3_snapshot_test.go index 74cb4ee50..4bcb3123d 100644 --- a/tests/e2e/ctl_v3_snapshot_test.go +++ b/tests/e2e/ctl_v3_snapshot_test.go @@ -223,7 +223,7 @@ func TestIssue6361(t *testing.T) { t.Fatal(err) } - t.Log("Start the etcd member using the restored snapshot...") + t.Log("(Re)starting the etcd member using the restored snapshot...") epc.procs[0].Config().dataDirPath = newDataDir for i := range epc.procs[0].Config().args { if epc.procs[0].Config().args[i] == "--data-dir" { @@ -255,7 +255,7 @@ func TestIssue6361(t *testing.T) { name2 := "infra2" initialCluster2 := epc.procs[0].Config().initialCluster + fmt.Sprintf(",%s=%s", name2, peerURL) - t.Log("Starting new member") + t.Log("Starting the new member") // start the new member var nepc *expect.ExpectProcess nepc, err = spawnCmd([]string{epc.procs[0].Config().execPath, "--name", name2, @@ -265,7 +265,7 @@ func TestIssue6361(t *testing.T) { if err != nil { t.Fatal(err) } - if _, err = nepc.Expect("enabled capabilities for version"); err != nil { + if _, err = nepc.Expect("ready to serve client requests"); err != nil { t.Fatal(err) } diff --git a/tests/e2e/ctl_v3_test.go b/tests/e2e/ctl_v3_test.go index 3900446bc..98b93783d 100644 --- a/tests/e2e/ctl_v3_test.go +++ b/tests/e2e/ctl_v3_test.go @@ -88,10 +88,10 @@ func versionTest(cx ctlCtx) { func clusterVersionTest(cx ctlCtx, expected string) { var err error - for i := 0; i < 7; i++ { + for i := 0; i < 35; i++ { if err = cURLGet(cx.epc, cURLReq{endpoint: "/version", expected: expected}); err != nil { cx.t.Logf("#%d: v3 is not ready yet (%v)", i, err) - time.Sleep(time.Second) + time.Sleep(200 * time.Millisecond) continue } break diff --git a/tests/e2e/etcd_process.go b/tests/e2e/etcd_process.go index 74ca20d71..a07f8438d 100644 --- a/tests/e2e/etcd_process.go +++ b/tests/e2e/etcd_process.go @@ -24,7 +24,7 @@ import ( ) var ( - etcdServerReadyLines = []string{"enabled capabilities for version", "published"} + etcdServerReadyLines = []string{"enabled capabilities for version", "published", "ready to serve client requests"} binPath string ctlBinPath string ) diff --git a/tests/e2e/v3_curl_test.go b/tests/e2e/v3_curl_test.go index 09a0d1d8a..6abe8749e 100644 --- a/tests/e2e/v3_curl_test.go +++ b/tests/e2e/v3_curl_test.go @@ -246,7 +246,7 @@ func testV3CurlAuth(cx ctlCtx) { cmdArgs = cURLPrefixArgs(cx.epc, "POST", cURLReq{endpoint: path.Join(p, "/auth/authenticate"), value: string(authreq)}) proc, err := spawnCmd(cmdArgs) testutil.AssertNil(cx.t, err) - defer proc.Close() + defer proc.Close() cURLRes, err := proc.ExpectFunc(lineFunc) testutil.AssertNil(cx.t, err)