From 8ccd4e11463ce969c1ff36185effe06eff151d85 Mon Sep 17 00:00:00 2001 From: Piotr Tabor Date: Sun, 10 Jan 2021 12:46:38 +0100 Subject: [PATCH 1/6] Fix flaky tests reported due to data race on grpc logging registration. Example: ``` ================== WARNING: DATA RACE Write at 0x000002178320 by goroutine 575: google.golang.org/grpc/grpclog.SetLoggerV2() /home/ptab/private/golang/pkg/mod/google.golang.org/grpc@v1.29.1/grpclog/loggerv2.go:70 +0x444 go.etcd.io/etcd/server/v3/embed.(*Config).setupLogging.func1.1() /home/ptab/corp/etcd/server/embed/config_logging.go:119 +0x345 sync.(*Once).doSlow() /usr/lib/google-golang/src/sync/once.go:66 +0x109 sync.(*Once).Do() /usr/lib/google-golang/src/sync/once.go:57 +0x68 go.etcd.io/etcd/server/v3/embed.(*Config).setupLogging.func1() /home/ptab/corp/etcd/server/embed/config_logging.go:109 +0x3b1 go.etcd.io/etcd/server/v3/embed.(*Config).setupLogging() /home/ptab/corp/etcd/server/embed/config_logging.go:174 +0x6af go.etcd.io/etcd/server/v3/embed.(*Config).Validate() /home/ptab/corp/etcd/server/embed/config.go:553 +0x55 go.etcd.io/etcd/server/v3/embed.StartEtcd() /home/ptab/corp/etcd/server/embed/etcd.go:93 +0x84 go.etcd.io/etcd/tests/v3/integration.TestKVWithEmptyValue() /home/ptab/corp/etcd/tests/integration/v3_kv_test.go:33 +0x18c testing.tRunner() /usr/lib/google-golang/src/testing/testing.go:1123 +0x202 Previous read at 0x000002178320 by goroutine 956: [failed to restore the stack] Goroutine 575 (running) created at: testing.(*T).Run() /usr/lib/google-golang/src/testing/testing.go:1168 +0x5bb testing.runTests.func1() /usr/lib/google-golang/src/testing/testing.go:1441 +0xa6 testing.tRunner() /usr/lib/google-golang/src/testing/testing.go:1123 +0x202 testing.runTests() /usr/lib/google-golang/src/testing/testing.go:1439 +0x612 testing.(*M).Run() /usr/lib/google-golang/src/testing/testing.go:1347 +0x3c4 go.etcd.io/etcd/pkg/v3/testutil.MustTestMainWithLeakDetection() /home/ptab/corp/etcd/pkg/testutil/leak.go:150 +0x38 go.etcd.io/etcd/tests/v3/integration.TestMain() /home/ptab/corp/etcd/tests/integration/main_test.go:14 +0x272 main.main() _testmain.go:349 +0x269 Goroutine 956 (finished) created at: google.golang.org/grpc/internal/transport.newHTTP2Server() /home/ptab/private/golang/pkg/mod/google.golang.org/grpc@v1.29.1/internal/transport/http2_server.go:288 +0x18a4 google.golang.org/grpc/internal/transport.NewServerTransport() /home/ptab/private/golang/pkg/mod/google.golang.org/grpc@v1.29.1/internal/transport/transport.go:534 +0x2f5 google.golang.org/grpc.(*Server).newHTTP2Transport() /home/ptab/private/golang/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:726 +0x2ca google.golang.org/grpc.(*Server).handleRawConn() /home/ptab/private/golang/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:693 +0x60f google.golang.org/grpc.(*Server).Serve.func3() /home/ptab/private/golang/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:663 +0x4c ================== ... {"level":"info","ts":"2021-01-09T22:21:04.550+0100","caller":"embed/etcd.go:330","msg":"closed etcd server","name":"default","data-dir":"/tmp/etcd-017337431","advertise-peer-urls":["http://localhost:2380"],"advertise-client-urls":["http://localhost:2379"]} --- FAIL: TestKVWithEmptyValue (1.08s) v3_kv_test.go:62: my-namespace/foobar = data v3_kv_test.go:62: my-namespace/foobar1 = data v3_kv_test.go:62: namespace/foobar1 = data v3_kv_test.go:72: foobar = data v3_kv_test.go:72: foobar1 = data v3_kv_test.go:87: delete keys:2 testing.go:1038: race detected during execution of test ``` --- server/embed/config_logging.go | 34 +++++++++++++++++++++------------- tests/integration/main_test.go | 2 ++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/server/embed/config_logging.go b/server/embed/config_logging.go index 1b0909221..7ab81191e 100644 --- a/server/embed/config_logging.go +++ b/server/embed/config_logging.go @@ -40,6 +40,26 @@ func (cfg Config) GetLogger() *zap.Logger { // for testing var grpcLogOnce = new(sync.Once) +func setupGrpcLogging(debug bool, config zap.Config) { + grpcLogOnce.Do(func() { + // debug true, enable info, warning, error + // debug false, only discard info + if debug { + var gl grpclog.LoggerV2 + gl, err := logutil.NewGRPCLoggerV2(config) + if err == nil { + grpclog.SetLoggerV2(gl) + } + } else { + grpclog.SetLoggerV2(grpclog.NewLoggerV2(ioutil.Discard, os.Stderr, os.Stderr)) + } + }) +} + +func SetupGrpcLoggingForTest(debug bool) { + setupGrpcLogging(debug, zap.NewDevelopmentConfig()) +} + // setupLogging initializes etcd logging. // Must be called after flag parsing or finishing configuring embed.Config. func (cfg *Config) setupLogging() error { @@ -106,19 +126,7 @@ func (cfg *Config) setupLogging() error { c.loggerConfig = &copied c.loggerCore = nil c.loggerWriteSyncer = nil - grpcLogOnce.Do(func() { - // debug true, enable info, warning, error - // debug false, only discard info - if cfg.LogLevel == "debug" { - var gl grpclog.LoggerV2 - gl, err = logutil.NewGRPCLoggerV2(copied) - if err == nil { - grpclog.SetLoggerV2(gl) - } - } else { - grpclog.SetLoggerV2(grpclog.NewLoggerV2(ioutil.Discard, os.Stderr, os.Stderr)) - } - }) + setupGrpcLogging(cfg.LogLevel == "debug", copied) return nil } } diff --git a/tests/integration/main_test.go b/tests/integration/main_test.go index a4c8fd237..cb47a9d78 100644 --- a/tests/integration/main_test.go +++ b/tests/integration/main_test.go @@ -8,8 +8,10 @@ import ( "testing" "go.etcd.io/etcd/pkg/v3/testutil" + "go.etcd.io/etcd/server/v3/embed" ) func TestMain(m *testing.M) { + embed.SetupGrpcLoggingForTest(true) testutil.MustTestMainWithLeakDetection(m) } From e2a65bee6e8708aa54f180289340ec40ecc165af Mon Sep 17 00:00:00 2001 From: Piotr Tabor Date: Sun, 10 Jan 2021 12:47:23 +0100 Subject: [PATCH 2/6] Avoid 'interactive prompt' for root password in etcd tests. Before: ``` {"level":"info","ts":1610273495.3791487,"caller":"agent/handler.go:668","msg":"cleaning up page cache"} {"level":"info","ts":1610273495.3793094,"caller":"agent/handler.go:94","msg":"created etcd log file","path":"/tmp/etcd-functional-2/etcd.log"} {"level":"info","ts":1610273495.379328,"caller":"agent/handler.go:668","msg":"cleaning up page cache"} [sudo] password for ptab: pam_glogin: invalid password Sorry, try again. [sudo] password for ptab: ``` Now the caches are dropped if the current users is in sudoers, bot not in the other cases. To be honest I don't see the purpose for dropping the caches at all in the test. --- tests/functional/agent/handler.go | 1 + tests/functional/agent/utils.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/functional/agent/handler.go b/tests/functional/agent/handler.go index e59c9038b..9a2962604 100644 --- a/tests/functional/agent/handler.go +++ b/tests/functional/agent/handler.go @@ -665,6 +665,7 @@ func (srv *Server) handle_SIGQUIT_ETCD_AND_ARCHIVE_DATA() (*rpcpb.Response, erro } } + // TODO: Verify whether this cleaning of 'cache pages' is needed. srv.lg.Info("cleaning up page cache") if err := cleanPageCache(); err != nil { srv.lg.Warn("failed to clean up page cache", zap.String("error", err.Error())) diff --git a/tests/functional/agent/utils.go b/tests/functional/agent/utils.go index 45ea66333..d77fce60b 100644 --- a/tests/functional/agent/utils.go +++ b/tests/functional/agent/utils.go @@ -103,6 +103,6 @@ func copyFile(src, dst string) error { func cleanPageCache() error { // https://www.kernel.org/doc/Documentation/sysctl/vm.txt // https://github.com/torvalds/linux/blob/master/fs/drop_caches.c - cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`) + cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo -s -n`) return cmd.Run() } From 26f9b4be8fd78a3d3b2bc4cf6d118302c4bb7328 Mon Sep 17 00:00:00 2001 From: Piotr Tabor Date: Sun, 10 Jan 2021 23:58:31 +0100 Subject: [PATCH 3/6] e2e tests were leaking 'defunc' etcdctl processes. The commit ensures that spawned etcdctl processes are "closed", so they perform proper os wait processing. This might have contributed to file-descriptor/open-files limit being exceeded. --- pkg/testutil/assert.go | 5 +++++ tests/e2e/ctl_v3_auth_test.go | 4 +++- tests/e2e/ctl_v3_elect_test.go | 4 ---- tests/e2e/ctl_v3_lock_test.go | 4 ---- tests/e2e/ctl_v3_role_test.go | 3 ++- tests/e2e/ctl_v3_snapshot_test.go | 8 ++++++++ tests/e2e/ctl_v3_user_test.go | 1 + tests/e2e/v3_curl_test.go | 1 + 8 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pkg/testutil/assert.go b/pkg/testutil/assert.go index 9cf03457d..e8e042021 100644 --- a/pkg/testutil/assert.go +++ b/pkg/testutil/assert.go @@ -21,6 +21,7 @@ import ( ) func AssertEqual(t *testing.T, e, a interface{}, msg ...string) { + t.Helper() if (e == nil || a == nil) && (isNil(e) && isNil(a)) { return } @@ -36,20 +37,24 @@ func AssertEqual(t *testing.T, e, a interface{}, msg ...string) { } func AssertNil(t *testing.T, v interface{}) { + t.Helper() AssertEqual(t, nil, v) } func AssertNotNil(t *testing.T, v interface{}) { + t.Helper() if v == nil { t.Fatalf("expected non-nil, got %+v", v) } } func AssertTrue(t *testing.T, v bool, msg ...string) { + t.Helper() AssertEqual(t, true, v, msg...) } func AssertFalse(t *testing.T, v bool, msg ...string) { + t.Helper() AssertEqual(t, false, v, msg...) } diff --git a/tests/e2e/ctl_v3_auth_test.go b/tests/e2e/ctl_v3_auth_test.go index 79fe3f662..a0eab7cbe 100644 --- a/tests/e2e/ctl_v3_auth_test.go +++ b/tests/e2e/ctl_v3_auth_test.go @@ -25,7 +25,9 @@ import ( "go.etcd.io/etcd/client/v3" ) -func TestCtlV3AuthEnable(t *testing.T) { testCtl(t, authEnableTest) } +func TestCtlV3AuthEnable(t *testing.T) { + testCtl(t, authEnableTest) +} func TestCtlV3AuthDisable(t *testing.T) { testCtl(t, authDisableTest) } func TestCtlV3AuthStatus(t *testing.T) { testCtl(t, authStatusTest) } func TestCtlV3AuthWriteKey(t *testing.T) { testCtl(t, authCredWriteKeyTest) } diff --git a/tests/e2e/ctl_v3_elect_test.go b/tests/e2e/ctl_v3_elect_test.go index c3fb90ac1..332ce9708 100644 --- a/tests/e2e/ctl_v3_elect_test.go +++ b/tests/e2e/ctl_v3_elect_test.go @@ -24,10 +24,6 @@ import ( ) func TestCtlV3Elect(t *testing.T) { - oldenv := os.Getenv("EXPECT_DEBUG") - defer os.Setenv("EXPECT_DEBUG", oldenv) - os.Setenv("EXPECT_DEBUG", "1") - testCtl(t, testElect) } diff --git a/tests/e2e/ctl_v3_lock_test.go b/tests/e2e/ctl_v3_lock_test.go index 2bcc5a823..8a1598735 100644 --- a/tests/e2e/ctl_v3_lock_test.go +++ b/tests/e2e/ctl_v3_lock_test.go @@ -24,10 +24,6 @@ import ( ) func TestCtlV3Lock(t *testing.T) { - oldenv := os.Getenv("EXPECT_DEBUG") - defer os.Setenv("EXPECT_DEBUG", oldenv) - os.Setenv("EXPECT_DEBUG", "1") - testCtl(t, testLock) } diff --git a/tests/e2e/ctl_v3_role_test.go b/tests/e2e/ctl_v3_role_test.go index b7c8a5446..fb4e5de60 100644 --- a/tests/e2e/ctl_v3_role_test.go +++ b/tests/e2e/ctl_v3_role_test.go @@ -114,6 +114,7 @@ func ctlV3RoleGrantPermission(cx ctlCtx, rolename string, perm grantingPerm) err if err != nil { return err } + defer proc.Close() expStr := fmt.Sprintf("Role %s updated", rolename) _, err = proc.Expect(expStr) @@ -139,7 +140,7 @@ func ctlV3RoleRevokePermission(cx ctlCtx, rolename string, key, rangeEnd string, if err != nil { return err } - + defer proc.Close() _, err = proc.Expect(expStr) return err } diff --git a/tests/e2e/ctl_v3_snapshot_test.go b/tests/e2e/ctl_v3_snapshot_test.go index 557df0dd3..5e508d27d 100644 --- a/tests/e2e/ctl_v3_snapshot_test.go +++ b/tests/e2e/ctl_v3_snapshot_test.go @@ -154,6 +154,14 @@ func getSnapshotStatus(cx ctlCtx, fpath string) (snapshot.Status, error) { // TestIssue6361 ensures new member that starts with snapshot correctly // syncs up with other members and serve correct data. func TestIssue6361(t *testing.T) { + { + // This tests is pretty flaky on semaphoreci as of 2021-01-10. + // TODO: Remove when the flakiness source is identified. + oldenv := os.Getenv("EXPECT_DEBUG") + defer os.Setenv("EXPECT_DEBUG", oldenv) + os.Setenv("EXPECT_DEBUG", "1") + } + defer testutil.AfterTest(t) os.Setenv("ETCDCTL_API", "3") defer os.Unsetenv("ETCDCTL_API") diff --git a/tests/e2e/ctl_v3_user_test.go b/tests/e2e/ctl_v3_user_test.go index db31d8774..8672ae7b6 100644 --- a/tests/e2e/ctl_v3_user_test.go +++ b/tests/e2e/ctl_v3_user_test.go @@ -183,6 +183,7 @@ func ctlV3User(cx ctlCtx, args []string, expStr string, stdIn []string) error { if err != nil { return err } + defer proc.Close() // Send 'stdIn' strings as input. for _, s := range stdIn { diff --git a/tests/e2e/v3_curl_test.go b/tests/e2e/v3_curl_test.go index 7c597b8c1..09a0d1d8a 100644 --- a/tests/e2e/v3_curl_test.go +++ b/tests/e2e/v3_curl_test.go @@ -246,6 +246,7 @@ func testV3CurlAuth(cx ctlCtx) { cmdArgs = cURLPrefixArgs(cx.epc, "POST", cURLReq{endpoint: path.Join(p, "/auth/authenticate"), value: string(authreq)}) proc, err := spawnCmd(cmdArgs) testutil.AssertNil(cx.t, err) + defer proc.Close() cURLRes, err := proc.ExpectFunc(lineFunc) testutil.AssertNil(cx.t, err) From 74274f441757e067eba1a7b705fa6b9204efb3a6 Mon Sep 17 00:00:00 2001 From: Piotr Tabor Date: Mon, 11 Jan 2021 00:20:16 +0100 Subject: [PATCH 4/6] e2e: Adding better diagnostic and location for temporary files to Snapshot tests. --- .../api/v2store/node_extern_test.go | 6 +++ tests/e2e/ctl_v3_snapshot_test.go | 53 +++++++++++-------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/server/etcdserver/api/v2store/node_extern_test.go b/server/etcdserver/api/v2store/node_extern_test.go index 067cdc42d..f5f99951a 100644 --- a/server/etcdserver/api/v2store/node_extern_test.go +++ b/server/etcdserver/api/v2store/node_extern_test.go @@ -102,7 +102,13 @@ func TestNodeExternClone(t *testing.T) { } func sameSlice(a, b []*NodeExtern) bool { +<<<<<<< HEAD ah := (*reflect.SliceHeader)(unsafe.Pointer(&a)) bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) return *ah == *bh +======= + va := reflect.ValueOf(a) + vb := reflect.ValueOf(b) + return va.Len() == vb.Len() && va.Pointer() == vb.Pointer() +>>>>>>> 6f8273a2a (fixup! e2e: Adding better diagnostic and location for temporary files to Snapshot tests.) } diff --git a/tests/e2e/ctl_v3_snapshot_test.go b/tests/e2e/ctl_v3_snapshot_test.go index 5e508d27d..74cb4ee50 100644 --- a/tests/e2e/ctl_v3_snapshot_test.go +++ b/tests/e2e/ctl_v3_snapshot_test.go @@ -18,7 +18,7 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" + "math/rand" "os" "path/filepath" "strings" @@ -32,6 +32,13 @@ import ( func TestCtlV3Snapshot(t *testing.T) { testCtl(t, snapshotTest) } +// TODO: Replace with testing.T.TestDir() in golang-1.15. +func tempDir(tb testing.TB) string { + dir := filepath.Join(os.TempDir(), tb.Name(), fmt.Sprint(rand.Int())) + os.MkdirAll(dir, 0700) + return dir +} + func snapshotTest(cx ctlCtx) { maintenanceInitKeys(cx) @@ -43,7 +50,7 @@ func snapshotTest(cx ctlCtx) { cx.t.Fatalf("snapshot: ctlV3Put error (%v)", err) } - fpath := "test1.snapshot" + fpath := filepath.Join(tempDir(cx.t), "snapshot") defer os.RemoveAll(fpath) if err = ctlV3SnapshotSave(cx, fpath); err != nil { @@ -65,7 +72,7 @@ func snapshotTest(cx ctlCtx) { func TestCtlV3SnapshotCorrupt(t *testing.T) { testCtl(t, snapshotCorruptTest) } func snapshotCorruptTest(cx ctlCtx) { - fpath := "test2.snapshot" + fpath := filepath.Join(tempDir(cx.t), "snapshot") defer os.RemoveAll(fpath) if err := ctlV3SnapshotSave(cx, fpath); err != nil { @@ -82,10 +89,12 @@ func snapshotCorruptTest(cx ctlCtx) { } f.Close() - defer os.RemoveAll("snap.etcd") + datadir := filepath.Join(tempDir(cx.t), "data") + defer os.RemoveAll(datadir) + serr := spawnWithExpect( append(cx.PrefixArgs(), "snapshot", "restore", - "--data-dir", "snap.etcd", + "--data-dir", datadir, fpath), "expected sha256") @@ -98,7 +107,7 @@ func snapshotCorruptTest(cx ctlCtx) { func TestCtlV3SnapshotStatusBeforeRestore(t *testing.T) { testCtl(t, snapshotStatusBeforeRestoreTest) } func snapshotStatusBeforeRestoreTest(cx ctlCtx) { - fpath := "test3.snapshot" + fpath := filepath.Join(tempDir(cx.t), "snapshot") defer os.RemoveAll(fpath) if err := ctlV3SnapshotSave(cx, fpath); err != nil { @@ -111,10 +120,11 @@ func snapshotStatusBeforeRestoreTest(cx ctlCtx) { cx.t.Fatalf("snapshotTest getSnapshotStatus error (%v)", err) } - defer os.RemoveAll("snap.etcd") + dataDir := filepath.Join(tempDir(cx.t), "data") + defer os.RemoveAll(dataDir) serr := spawnWithExpect( append(cx.PrefixArgs(), "snapshot", "restore", - "--data-dir", "snap.etcd", + "--data-dir", dataDir, fpath), "added member") if serr != nil { @@ -183,7 +193,7 @@ func TestIssue6361(t *testing.T) { dialTimeout := 10 * time.Second prefixArgs := []string{ctlBinPath, "--endpoints", strings.Join(epc.EndpointsV3(), ","), "--dial-timeout", dialTimeout.String()} - // write some keys + t.Log("Writing some keys...") kvs := []kv{{"foo1", "val1"}, {"foo2", "val2"}, {"foo3", "val3"}} for i := range kvs { if err = spawnWithExpect(append(prefixArgs, "put", kvs[i].key, kvs[i].val), "OK"); err != nil { @@ -191,28 +201,29 @@ func TestIssue6361(t *testing.T) { } } - fpath := filepath.Join(os.TempDir(), "test.snapshot") + fpath := filepath.Join(tempDir(t), "snapshot") defer os.RemoveAll(fpath) - // etcdctl save snapshot + t.Log("etcdctl saving snapshot...") if err = spawnWithExpect(append(prefixArgs, "snapshot", "save", fpath), fmt.Sprintf("Snapshot saved at %s", fpath)); err != nil { t.Fatal(err) } + t.Log("Stopping the original server...") if err = epc.procs[0].Stop(); err != nil { t.Fatal(err) } - newDataDir := filepath.Join(os.TempDir(), "test.data") + newDataDir := tempDir(t) defer os.RemoveAll(newDataDir) - // etcdctl restore the snapshot + t.Log("etcdctl restoring the snapshot...") err = spawnWithExpect([]string{ctlBinPath, "snapshot", "restore", fpath, "--name", epc.procs[0].Config().name, "--initial-cluster", epc.procs[0].Config().initialCluster, "--initial-cluster-token", epc.procs[0].Config().initialToken, "--initial-advertise-peer-urls", epc.procs[0].Config().purl.String(), "--data-dir", newDataDir}, "added member") if err != nil { t.Fatal(err) } - // start the etcd member using the restored snapshot + t.Log("Start the etcd member using the restored snapshot...") epc.procs[0].Config().dataDirPath = newDataDir for i := range epc.procs[0].Config().args { if epc.procs[0].Config().args[i] == "--data-dir" { @@ -223,14 +234,14 @@ func TestIssue6361(t *testing.T) { t.Fatal(err) } - // ensure the restored member has the correct data + t.Log("Ensuring the restored member has the correct data...") for i := range kvs { if err = spawnWithExpect(append(prefixArgs, "get", kvs[i].key), kvs[i].val); err != nil { t.Fatal(err) } } - // add a new member into the cluster + t.Log("Adding new member into the cluster") clientURL := fmt.Sprintf("http://localhost:%d", etcdProcessBasePort+30) peerURL := fmt.Sprintf("http://localhost:%d", etcdProcessBasePort+31) err = spawnWithExpect(append(prefixArgs, "member", "add", "newmember", fmt.Sprintf("--peer-urls=%s", peerURL)), " added to cluster ") @@ -238,16 +249,13 @@ func TestIssue6361(t *testing.T) { t.Fatal(err) } - var newDataDir2 string - newDataDir2, err = ioutil.TempDir("", "newdata2") - if err != nil { - t.Fatal(err) - } + newDataDir2 := filepath.Join(tempDir(t), "newdata") defer os.RemoveAll(newDataDir2) name2 := "infra2" initialCluster2 := epc.procs[0].Config().initialCluster + fmt.Sprintf(",%s=%s", name2, peerURL) + t.Log("Starting new member") // start the new member var nepc *expect.ExpectProcess nepc, err = spawnCmd([]string{epc.procs[0].Config().execPath, "--name", name2, @@ -263,13 +271,14 @@ func TestIssue6361(t *testing.T) { prefixArgs = []string{ctlBinPath, "--endpoints", clientURL, "--dial-timeout", dialTimeout.String()} - // ensure added member has data from incoming snapshot + t.Log("Ensuring added member has data from incoming snapshot...") for i := range kvs { if err = spawnWithExpect(append(prefixArgs, "get", kvs[i].key), kvs[i].val); err != nil { t.Fatal(err) } } + t.Log("Stopping the second member") if err = nepc.Stop(); err != nil { t.Fatal(err) } From 70b5ef1d3af18926e0228a94d562376a89afd358 Mon Sep 17 00:00:00 2001 From: Piotr Tabor Date: Mon, 11 Jan 2021 12:01:22 +0100 Subject: [PATCH 5/6] Fix tests flakiness: in particular TestIssue6361. The root reason of flakes, was that server was considered as ready to early. In particular: ``` ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:44.474+0100","caller":"rafthttp/stream.go:274","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream Message","local-member-id":"ed5f620d34a8e61b","remote-peer-id":"ca50e9357181d758"} ../../bin/etcd-2456648: {"level":"warn","ts":"2021-01-11T09:56:49.040+0100","caller":"etcdserver/server.go:1942","msg":"failed to publish local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","publish-timeout":"7s","error":"etcdserver: request timed out, possibly due to connection lost"} ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:49.049+0100","caller":"etcdserver/server.go:1921","msg":"published local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","cluster-id":"34f27e83b3bc2ff","publish-timeout":"7s"} ``` was taking 5s. If this was happening concurrently with etcdctl, the etcdctl could timeout. The fix, requires servers to report 'ready to serve client requests' to consider them up. Fixed also some whitelisted 'goroutines'. --- pkg/testutil/leak.go | 2 ++ tests/e2e/ctl_v3_auth_test.go | 2 +- tests/e2e/ctl_v3_snapshot_test.go | 6 +++--- tests/e2e/ctl_v3_test.go | 4 ++-- tests/e2e/etcd_process.go | 2 +- tests/e2e/v3_curl_test.go | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/testutil/leak.go b/pkg/testutil/leak.go index 0d85c0c7c..4b7a4a9b5 100644 --- a/pkg/testutil/leak.go +++ b/pkg/testutil/leak.go @@ -111,10 +111,12 @@ func interestingGoroutines() (gs []string) { if stack == "" || strings.Contains(stack, "sync.(*WaitGroup).Done") || strings.Contains(stack, "os.(*file).close") || + strings.Contains(stack, "os.(*Process).Release") || strings.Contains(stack, "created by os/signal.init") || strings.Contains(stack, "runtime/panic.go") || strings.Contains(stack, "created by testing.RunTests") || strings.Contains(stack, "created by testing.runTests") || + strings.Contains(stack, "created by testing.(*T).Run") || strings.Contains(stack, "testing.Main(") || strings.Contains(stack, "runtime.goexit") || strings.Contains(stack, "go.etcd.io/etcd/pkg/v3/testutil.interestingGoroutines") || diff --git a/tests/e2e/ctl_v3_auth_test.go b/tests/e2e/ctl_v3_auth_test.go index a0eab7cbe..bee66e6d9 100644 --- a/tests/e2e/ctl_v3_auth_test.go +++ b/tests/e2e/ctl_v3_auth_test.go @@ -25,7 +25,7 @@ import ( "go.etcd.io/etcd/client/v3" ) -func TestCtlV3AuthEnable(t *testing.T) { +func TestCtlV3AuthEnable(t *testing.T) { testCtl(t, authEnableTest) } func TestCtlV3AuthDisable(t *testing.T) { testCtl(t, authDisableTest) } diff --git a/tests/e2e/ctl_v3_snapshot_test.go b/tests/e2e/ctl_v3_snapshot_test.go index 74cb4ee50..4bcb3123d 100644 --- a/tests/e2e/ctl_v3_snapshot_test.go +++ b/tests/e2e/ctl_v3_snapshot_test.go @@ -223,7 +223,7 @@ func TestIssue6361(t *testing.T) { t.Fatal(err) } - t.Log("Start the etcd member using the restored snapshot...") + t.Log("(Re)starting the etcd member using the restored snapshot...") epc.procs[0].Config().dataDirPath = newDataDir for i := range epc.procs[0].Config().args { if epc.procs[0].Config().args[i] == "--data-dir" { @@ -255,7 +255,7 @@ func TestIssue6361(t *testing.T) { name2 := "infra2" initialCluster2 := epc.procs[0].Config().initialCluster + fmt.Sprintf(",%s=%s", name2, peerURL) - t.Log("Starting new member") + t.Log("Starting the new member") // start the new member var nepc *expect.ExpectProcess nepc, err = spawnCmd([]string{epc.procs[0].Config().execPath, "--name", name2, @@ -265,7 +265,7 @@ func TestIssue6361(t *testing.T) { if err != nil { t.Fatal(err) } - if _, err = nepc.Expect("enabled capabilities for version"); err != nil { + if _, err = nepc.Expect("ready to serve client requests"); err != nil { t.Fatal(err) } diff --git a/tests/e2e/ctl_v3_test.go b/tests/e2e/ctl_v3_test.go index 3900446bc..98b93783d 100644 --- a/tests/e2e/ctl_v3_test.go +++ b/tests/e2e/ctl_v3_test.go @@ -88,10 +88,10 @@ func versionTest(cx ctlCtx) { func clusterVersionTest(cx ctlCtx, expected string) { var err error - for i := 0; i < 7; i++ { + for i := 0; i < 35; i++ { if err = cURLGet(cx.epc, cURLReq{endpoint: "/version", expected: expected}); err != nil { cx.t.Logf("#%d: v3 is not ready yet (%v)", i, err) - time.Sleep(time.Second) + time.Sleep(200 * time.Millisecond) continue } break diff --git a/tests/e2e/etcd_process.go b/tests/e2e/etcd_process.go index 74ca20d71..a07f8438d 100644 --- a/tests/e2e/etcd_process.go +++ b/tests/e2e/etcd_process.go @@ -24,7 +24,7 @@ import ( ) var ( - etcdServerReadyLines = []string{"enabled capabilities for version", "published"} + etcdServerReadyLines = []string{"enabled capabilities for version", "published", "ready to serve client requests"} binPath string ctlBinPath string ) diff --git a/tests/e2e/v3_curl_test.go b/tests/e2e/v3_curl_test.go index 09a0d1d8a..6abe8749e 100644 --- a/tests/e2e/v3_curl_test.go +++ b/tests/e2e/v3_curl_test.go @@ -246,7 +246,7 @@ func testV3CurlAuth(cx ctlCtx) { cmdArgs = cURLPrefixArgs(cx.epc, "POST", cURLReq{endpoint: path.Join(p, "/auth/authenticate"), value: string(authreq)}) proc, err := spawnCmd(cmdArgs) testutil.AssertNil(cx.t, err) - defer proc.Close() + defer proc.Close() cURLRes, err := proc.ExpectFunc(lineFunc) testutil.AssertNil(cx.t, err) From 0d9cfc11c8d03fb9bc6e7c7054c26b9e3057c3ba Mon Sep 17 00:00:00 2001 From: Piotr Tabor Date: Mon, 11 Jan 2021 23:24:01 +0100 Subject: [PATCH 6/6] Fix usage of reflect.SliceHeader: reported by vet on tip golang Example: https://travis-ci.com/github/etcd-io/etcd/jobs/470404938 ``` % (cd server && go vet ./...) stderr: # go.etcd.io/etcd/server/v3/etcdserver/api/v2store stderr: etcdserver/api/v2store/node_extern_test.go:107:9: possible misuse of reflect.SliceHeader stderr: etcdserver/api/v2store/node_extern_test.go:107:16: possible misuse of reflect.SliceHeader ``` --- server/etcdserver/api/v2store/node_extern_test.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/server/etcdserver/api/v2store/node_extern_test.go b/server/etcdserver/api/v2store/node_extern_test.go index f5f99951a..48daeb648 100644 --- a/server/etcdserver/api/v2store/node_extern_test.go +++ b/server/etcdserver/api/v2store/node_extern_test.go @@ -18,7 +18,6 @@ import ( "reflect" "testing" "time" - "unsafe" "go.etcd.io/etcd/pkg/v3/testutil" ) @@ -102,13 +101,7 @@ func TestNodeExternClone(t *testing.T) { } func sameSlice(a, b []*NodeExtern) bool { -<<<<<<< HEAD - ah := (*reflect.SliceHeader)(unsafe.Pointer(&a)) - bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - return *ah == *bh -======= va := reflect.ValueOf(a) vb := reflect.ValueOf(b) return va.Len() == vb.Len() && va.Pointer() == vb.Pointer() ->>>>>>> 6f8273a2a (fixup! e2e: Adding better diagnostic and location for temporary files to Snapshot tests.) }