From b922a286220f2348ca8662dbf8dc7203cde57f66 Mon Sep 17 00:00:00 2001 From: Sergey Kacheev Date: Mon, 19 Jul 2021 01:31:21 +0700 Subject: [PATCH 01/43] netutil: add url comparison without resolver to URLStringsEqual If one of the nodes in the cluster has lost a dns record, restarting the second node will break it. This PR makes an attempt to add a comparison without using a resolver, which allows to protect cluster from dns errors and does not break the current logic of comparing urls in the URLStringsEqual function. You can read more in the issue #7798 Fixes #7798 --- pkg/netutil/netutil.go | 42 ++++++++++++++++++++++++------------- pkg/netutil/netutil_test.go | 33 ++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/pkg/netutil/netutil.go b/pkg/netutil/netutil.go index bf737a4d9..cda090034 100644 --- a/pkg/netutil/netutil.go +++ b/pkg/netutil/netutil.go @@ -174,21 +174,13 @@ func URLStringsEqual(ctx context.Context, lg *zap.Logger, a []string, b []string if len(a) != len(b) { return false, fmt.Errorf("len(%q) != len(%q)", a, b) } - urlsA := make([]url.URL, 0) - for _, str := range a { - u, err := url.Parse(str) - if err != nil { - return false, fmt.Errorf("failed to parse %q", str) - } - urlsA = append(urlsA, *u) + urlsA, err := stringsToURLs(a) + if err != nil { + return false, err } - urlsB := make([]url.URL, 0) - for _, str := range b { - u, err := url.Parse(str) - if err != nil { - return false, fmt.Errorf("failed to parse %q", str) - } - urlsB = append(urlsB, *u) + urlsB, err := stringsToURLs(b) + if err != nil { + return false, err } if lg == nil { lg, _ = zap.NewProduction() @@ -196,7 +188,15 @@ func URLStringsEqual(ctx context.Context, lg *zap.Logger, a []string, b []string lg = zap.NewExample() } } - return urlsEqual(ctx, lg, urlsA, urlsB) + sort.Sort(types.URLs(urlsA)) + sort.Sort(types.URLs(urlsB)) + for i := range urlsA { + if !reflect.DeepEqual(urlsA[i], urlsB[i]) { + // If urls are not equal, try to resolve it and compare again. + return urlsEqual(ctx, lg, urlsA, urlsB) + } + } + return true, nil } func urlsToStrings(us []url.URL) []string { @@ -207,6 +207,18 @@ func urlsToStrings(us []url.URL) []string { return rs } +func stringsToURLs(us []string) ([]url.URL, error) { + urls := make([]url.URL, 0, len(us)) + for _, str := range us { + u, err := url.Parse(str) + if err != nil { + return nil, fmt.Errorf("failed to parse %q", str) + } + urls = append(urls, *u) + } + return urls, nil +} + func IsNetworkTimeoutError(err error) bool { nerr, ok := err.(net.Error) return ok && nerr.Timeout() diff --git a/pkg/netutil/netutil_test.go b/pkg/netutil/netutil_test.go index 42b05ca29..7d1d17aa2 100644 --- a/pkg/netutil/netutil_test.go +++ b/pkg/netutil/netutil_test.go @@ -17,6 +17,7 @@ package netutil import ( "context" "errors" + "fmt" "net" "net/url" "reflect" @@ -292,11 +293,33 @@ func TestURLsEqual(t *testing.T) { } } func TestURLStringsEqual(t *testing.T) { - result, err := URLStringsEqual(context.TODO(), zap.NewExample(), []string{"http://127.0.0.1:8080"}, []string{"http://127.0.0.1:8080"}) - if !result { - t.Errorf("unexpected result %v", result) + defer func() { resolveTCPAddr = resolveTCPAddrDefault }() + errOnResolve := func(ctx context.Context, addr string) (*net.TCPAddr, error) { + return nil, fmt.Errorf("unexpected attempt to resolve: %q", addr) } - if err != nil { - t.Errorf("unexpected error %v", err) + cases := []struct { + urlsA []string + urlsB []string + resolver func(ctx context.Context, addr string) (*net.TCPAddr, error) + }{ + {[]string{"http://127.0.0.1:8080"}, []string{"http://127.0.0.1:8080"}, resolveTCPAddrDefault}, + {[]string{ + "http://host1:8080", + "http://host2:8080", + }, []string{ + "http://host1:8080", + "http://host2:8080", + }, errOnResolve}, + } + for idx, c := range cases { + t.Logf("TestURLStringsEqual, case #%d", idx) + resolveTCPAddr = c.resolver + result, err := URLStringsEqual(context.TODO(), zap.NewExample(), c.urlsA, c.urlsB) + if !result { + t.Errorf("unexpected result %v", result) + } + if err != nil { + t.Errorf("unexpected error %v", err) + } } } From daf7e0350a77ae2866ef193c8c64c4328d281d02 Mon Sep 17 00:00:00 2001 From: Sergey Kacheev Date: Thu, 22 Jul 2021 23:15:20 +0700 Subject: [PATCH 02/43] etcdserver: remove code duplication from the peer.send func During the refactoring process, duplicate logging of the send buffer overflow event was added. Each of these log lines logs exactly the same information, the logging context is sufficient to distinguish the cause. Additionally, the unnecessary context (in parentheses) in the log message was removed, which was necessary without the zap context (with the old logger), but now only confuses. --- server/etcdserver/api/rafthttp/peer.go | 34 ++++++++------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/server/etcdserver/api/rafthttp/peer.go b/server/etcdserver/api/rafthttp/peer.go index 444d6bde9..c2f79e08a 100644 --- a/server/etcdserver/api/rafthttp/peer.go +++ b/server/etcdserver/api/rafthttp/peer.go @@ -250,30 +250,16 @@ func (p *peer) send(m raftpb.Message) { if isMsgSnap(m) { p.r.ReportSnapshot(m.To, raft.SnapshotFailure) } - if p.status.isActive() { - if p.lg != nil { - p.lg.Warn( - "dropped internal Raft message since sending buffer is full (overloaded network)", - zap.String("message-type", m.Type.String()), - zap.String("local-member-id", p.localID.String()), - zap.String("from", types.ID(m.From).String()), - zap.String("remote-peer-id", p.id.String()), - zap.String("remote-peer-name", name), - zap.Bool("remote-peer-active", p.status.isActive()), - ) - } - } else { - if p.lg != nil { - p.lg.Warn( - "dropped internal Raft message since sending buffer is full (overloaded network)", - zap.String("message-type", m.Type.String()), - zap.String("local-member-id", p.localID.String()), - zap.String("from", types.ID(m.From).String()), - zap.String("remote-peer-id", p.id.String()), - zap.String("remote-peer-name", name), - zap.Bool("remote-peer-active", p.status.isActive()), - ) - } + if p.lg != nil { + p.lg.Warn( + "dropped internal Raft message since sending buffer is full", + zap.String("message-type", m.Type.String()), + zap.String("local-member-id", p.localID.String()), + zap.String("from", types.ID(m.From).String()), + zap.String("remote-peer-id", p.id.String()), + zap.String("remote-peer-name", name), + zap.Bool("remote-peer-active", p.status.isActive()), + ) } sentFailures.WithLabelValues(types.ID(m.To).String()).Inc() } From 99182f5404c0cc79c9ec2a09d318fedfbf2274fd Mon Sep 17 00:00:00 2001 From: nickhill Date: Fri, 30 Jul 2021 14:59:20 -0700 Subject: [PATCH 03/43] etcdserver,clientv3: server-side ignore sort-ascend-key for range requests A client-side optimization was made in #6100 to filter ascending key sorts to avoid an unnecessary re-sort since this is the order already returned by the back-end logic. It seems to me that this really belongs on the server side since it's tied to the server implementation and should apply for any caller of the kv api (for example non-go clients). Related, the client/v3 syncer depends on this default sorting which isn't explicit in the kv api contract. So I'm proposing the required sort parameters be included explicitly; it will take the fast path either way. --- client/v3/mirror/syncer.go | 3 ++- server/etcdserver/apply.go | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/client/v3/mirror/syncer.go b/client/v3/mirror/syncer.go index c25ee9b0c..980bab5de 100644 --- a/client/v3/mirror/syncer.go +++ b/client/v3/mirror/syncer.go @@ -68,7 +68,8 @@ func (s *syncer) SyncBase(ctx context.Context) (<-chan clientv3.GetResponse, cha var key string - opts := []clientv3.OpOption{clientv3.WithLimit(batchLimit), clientv3.WithRev(s.rev)} + opts := []clientv3.OpOption{clientv3.WithLimit(batchLimit), clientv3.WithRev(s.rev), + clientv3.WithSort(clientv3.SortByKey, clientv3.SortAscend)} if len(s.prefix) == 0 { // If len(s.prefix) == 0, we will sync the entire key-value space. diff --git a/server/etcdserver/apply.go b/server/etcdserver/apply.go index 5235d61cd..114172ac8 100644 --- a/server/etcdserver/apply.go +++ b/server/etcdserver/apply.go @@ -386,6 +386,11 @@ func (a *applierV3backend) Range(ctx context.Context, txn mvcc.TxnRead, r *pb.Ra // sorted by keys in lexiographically ascending order, // sort ASCEND by default only when target is not 'KEY' sortOrder = pb.RangeRequest_ASCEND + } else if r.SortTarget == pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_ASCEND { + // Since current mvcc.Range implementation returns results + // sorted by keys in lexiographically ascending order, + // don't re-sort when target is 'KEY' and order is ASCEND + sortOrder = pb.RangeRequest_NONE } if sortOrder != pb.RangeRequest_NONE { var sorter sort.Interface From eac75c28ae2aad5df898058d1d7bb0ceb1944e3d Mon Sep 17 00:00:00 2001 From: Sergey Kacheev Date: Sun, 1 Aug 2021 00:20:50 +0700 Subject: [PATCH 04/43] Apply suggestions from code review Co-authored-by: Lili Cosic --- pkg/netutil/netutil.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/netutil/netutil.go b/pkg/netutil/netutil.go index cda090034..5b1e17966 100644 --- a/pkg/netutil/netutil.go +++ b/pkg/netutil/netutil.go @@ -192,7 +192,7 @@ func URLStringsEqual(ctx context.Context, lg *zap.Logger, a []string, b []string sort.Sort(types.URLs(urlsB)) for i := range urlsA { if !reflect.DeepEqual(urlsA[i], urlsB[i]) { - // If urls are not equal, try to resolve it and compare again. + // If URLs are not equal, try to resolve it and compare again. return urlsEqual(ctx, lg, urlsA, urlsB) } } @@ -212,7 +212,7 @@ func stringsToURLs(us []string) ([]url.URL, error) { for _, str := range us { u, err := url.Parse(str) if err != nil { - return nil, fmt.Errorf("failed to parse %q", str) + return nil, fmt.Errorf("failed to parse string to URL: %q", str) } urls = append(urls, *u) } From 65686efa4a3c59adb18e3e622d11fe60c4e211e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arda=20G=C3=BC=C3=A7l=C3=BC?= Date: Mon, 13 Sep 2021 12:28:56 +0300 Subject: [PATCH 05/43] Decouple prefixArgs from os.Env dependency prefixArgs uses os.Setenv in e2e tests instead envMap. This creates overwrites in some test cases and have an impact on test quality and isolation between tests. This PR uses ctlcontext envMap in each tests with high priority and merges os environment variables with low priority. --- tests/e2e/cluster_proxy_test.go | 2 +- tests/e2e/ctl_v2_test.go | 2 +- tests/e2e/ctl_v3_alarm_test.go | 2 +- tests/e2e/ctl_v3_auth_test.go | 38 +++++++++++++-------------- tests/e2e/ctl_v3_compact_test.go | 2 +- tests/e2e/ctl_v3_defrag_test.go | 4 +-- tests/e2e/ctl_v3_elect_test.go | 2 +- tests/e2e/ctl_v3_endpoint_test.go | 6 ++--- tests/e2e/ctl_v3_kv_test.go | 24 ++++++++--------- tests/e2e/ctl_v3_lease_test.go | 14 +++++----- tests/e2e/ctl_v3_lock_test.go | 4 +-- tests/e2e/ctl_v3_make_mirror_test.go | 2 +- tests/e2e/ctl_v3_member_test.go | 12 ++++----- tests/e2e/ctl_v3_move_leader_test.go | 13 ++++----- tests/e2e/ctl_v3_role_test.go | 8 +++--- tests/e2e/ctl_v3_snapshot_test.go | 13 +++++---- tests/e2e/ctl_v3_test.go | 12 ++++----- tests/e2e/ctl_v3_txn_test.go | 2 +- tests/e2e/ctl_v3_user_test.go | 2 +- tests/e2e/ctl_v3_watch_test.go | 4 +-- tests/e2e/etcd_config_test.go | 14 +++++----- tests/e2e/etcd_corrupt_test.go | 2 +- tests/e2e/etcd_process.go | 2 +- tests/e2e/etcd_spawn_nocov.go | 36 ++++++++++++++++++++----- tests/e2e/util.go | 14 ++++++---- tests/e2e/v2store_deprecation_test.go | 4 +-- tests/e2e/v3_curl_test.go | 4 +-- 27 files changed, 137 insertions(+), 107 deletions(-) diff --git a/tests/e2e/cluster_proxy_test.go b/tests/e2e/cluster_proxy_test.go index b96a10037..f11db67ac 100644 --- a/tests/e2e/cluster_proxy_test.go +++ b/tests/e2e/cluster_proxy_test.go @@ -132,7 +132,7 @@ func (pp *proxyProc) start() error { if pp.proc != nil { panic("already started") } - proc, err := spawnCmdWithLogger(pp.lg, append([]string{pp.execPath}, pp.args...)) + proc, err := spawnCmdWithLogger(pp.lg, append([]string{pp.execPath}, pp.args...), nil) if err != nil { return err } diff --git a/tests/e2e/ctl_v2_test.go b/tests/e2e/ctl_v2_test.go index 107f1c2f7..0aae87bc8 100644 --- a/tests/e2e/ctl_v2_test.go +++ b/tests/e2e/ctl_v2_test.go @@ -505,7 +505,7 @@ func etcdctlBackup(t testing.TB, clus *etcdProcessCluster, dataDir, backupDir st cmdArgs = append(cmdArgs, "--with-v3=false") } t.Logf("Running: %v", cmdArgs) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, nil) if err != nil { return err } diff --git a/tests/e2e/ctl_v3_alarm_test.go b/tests/e2e/ctl_v3_alarm_test.go index 7b9b445b0..19852c30b 100644 --- a/tests/e2e/ctl_v3_alarm_test.go +++ b/tests/e2e/ctl_v3_alarm_test.go @@ -101,5 +101,5 @@ func alarmTest(cx ctlCtx) { func ctlV3Alarm(cx ctlCtx, cmd string, as ...string) error { cmdArgs := append(cx.PrefixArgs(), "alarm", cmd) - return spawnWithExpects(cmdArgs, as...) + return spawnWithExpects(cmdArgs, cx.envMap, as...) } diff --git a/tests/e2e/ctl_v3_auth_test.go b/tests/e2e/ctl_v3_auth_test.go index 58a3b61e0..11db1b389 100644 --- a/tests/e2e/ctl_v3_auth_test.go +++ b/tests/e2e/ctl_v3_auth_test.go @@ -93,7 +93,7 @@ func authEnable(cx ctlCtx) error { func ctlV3AuthEnable(cx ctlCtx) error { cmdArgs := append(cx.PrefixArgs(), "auth", "enable") - return spawnWithExpect(cmdArgs, "Authentication Enabled") + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, "Authentication Enabled") } func authDisableTest(cx ctlCtx) { @@ -139,12 +139,12 @@ func authDisableTest(cx ctlCtx) { func ctlV3AuthDisable(cx ctlCtx) error { cmdArgs := append(cx.PrefixArgs(), "auth", "disable") - return spawnWithExpect(cmdArgs, "Authentication Disabled") + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, "Authentication Disabled") } func authStatusTest(cx ctlCtx) { cmdArgs := append(cx.PrefixArgs(), "auth", "status") - if err := spawnWithExpects(cmdArgs, "Authentication Status: false", "AuthRevision:"); err != nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, "Authentication Status: false", "AuthRevision:"); err != nil { cx.t.Fatal(err) } @@ -155,15 +155,15 @@ func authStatusTest(cx ctlCtx) { cx.user, cx.pass = "root", "root" cmdArgs = append(cx.PrefixArgs(), "auth", "status") - if err := spawnWithExpects(cmdArgs, "Authentication Status: true", "AuthRevision:"); err != nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, "Authentication Status: true", "AuthRevision:"); err != nil { cx.t.Fatal(err) } cmdArgs = append(cx.PrefixArgs(), "auth", "status", "--write-out", "json") - if err := spawnWithExpect(cmdArgs, "enabled"); err != nil { + if err := spawnWithExpectWithEnv(cmdArgs, cx.envMap, "enabled"); err != nil { cx.t.Fatal(err) } - if err := spawnWithExpect(cmdArgs, "authRevision"); err != nil { + if err := spawnWithExpectWithEnv(cmdArgs, cx.envMap, "authRevision"); err != nil { cx.t.Fatal(err) } } @@ -381,25 +381,25 @@ func authRoleRevokeDuringOpsTest(cx ctlCtx) { } func ctlV3PutFailAuth(cx ctlCtx, key, val string) error { - return spawnWithExpect(append(cx.PrefixArgs(), "put", key, val), "authentication failed") + return spawnWithExpectWithEnv(append(cx.PrefixArgs(), "put", key, val), cx.envMap, "authentication failed") } func ctlV3PutFailPerm(cx ctlCtx, key, val string) error { - return spawnWithExpect(append(cx.PrefixArgs(), "put", key, val), "permission denied") + return spawnWithExpectWithEnv(append(cx.PrefixArgs(), "put", key, val), cx.envMap, "permission denied") } func authSetupTestUser(cx ctlCtx) { if err := ctlV3User(cx, []string{"add", "test-user", "--interactive=false"}, "User test-user created", []string{"pass"}); err != nil { cx.t.Fatal(err) } - if err := spawnWithExpect(append(cx.PrefixArgs(), "role", "add", "test-role"), "Role test-role created"); err != nil { + if err := spawnWithExpectWithEnv(append(cx.PrefixArgs(), "role", "add", "test-role"), cx.envMap, "Role test-role created"); err != nil { cx.t.Fatal(err) } if err := ctlV3User(cx, []string{"grant-role", "test-user", "test-role"}, "Role test-role is granted to user test-user", nil); err != nil { cx.t.Fatal(err) } cmd := append(cx.PrefixArgs(), "role", "grant-permission", "test-role", "readwrite", "foo") - if err := spawnWithExpect(cmd, "Role test-role updated"); err != nil { + if err := spawnWithExpectWithEnv(cmd, cx.envMap, "Role test-role updated"); err != nil { cx.t.Fatal(err) } } @@ -611,7 +611,7 @@ func authTestCertCN(cx ctlCtx) { if err := ctlV3User(cx, []string{"add", "example.com", "--interactive=false"}, "User example.com created", []string{""}); err != nil { cx.t.Fatal(err) } - if err := spawnWithExpect(append(cx.PrefixArgs(), "role", "add", "test-role"), "Role test-role created"); err != nil { + if err := spawnWithExpectWithEnv(append(cx.PrefixArgs(), "role", "add", "test-role"), cx.envMap, "Role test-role created"); err != nil { cx.t.Fatal(err) } if err := ctlV3User(cx, []string{"grant-role", "example.com", "test-role"}, "Role test-role is granted to user example.com", nil); err != nil { @@ -921,13 +921,13 @@ func authTestRoleGet(cx ctlCtx) { "KV Read:", "foo", "KV Write:", "foo", } - if err := spawnWithExpects(append(cx.PrefixArgs(), "role", "get", "test-role"), expected...); err != nil { + if err := spawnWithExpects(append(cx.PrefixArgs(), "role", "get", "test-role"), cx.envMap, expected...); err != nil { cx.t.Fatal(err) } // test-user can get the information of test-role because it belongs to the role cx.user, cx.pass = "test-user", "pass" - if err := spawnWithExpects(append(cx.PrefixArgs(), "role", "get", "test-role"), expected...); err != nil { + if err := spawnWithExpects(append(cx.PrefixArgs(), "role", "get", "test-role"), cx.envMap, expected...); err != nil { cx.t.Fatal(err) } @@ -935,7 +935,7 @@ func authTestRoleGet(cx ctlCtx) { expected = []string{ "Error: etcdserver: permission denied", } - if err := spawnWithExpects(append(cx.PrefixArgs(), "role", "get", "root"), expected...); err != nil { + if err := spawnWithExpects(append(cx.PrefixArgs(), "role", "get", "root"), cx.envMap, expected...); err != nil { cx.t.Fatal(err) } } @@ -952,13 +952,13 @@ func authTestUserGet(cx ctlCtx) { "Roles: test-role", } - if err := spawnWithExpects(append(cx.PrefixArgs(), "user", "get", "test-user"), expected...); err != nil { + if err := spawnWithExpects(append(cx.PrefixArgs(), "user", "get", "test-user"), cx.envMap, expected...); err != nil { cx.t.Fatal(err) } // test-user can get the information of test-user itself cx.user, cx.pass = "test-user", "pass" - if err := spawnWithExpects(append(cx.PrefixArgs(), "user", "get", "test-user"), expected...); err != nil { + if err := spawnWithExpects(append(cx.PrefixArgs(), "user", "get", "test-user"), cx.envMap, expected...); err != nil { cx.t.Fatal(err) } @@ -966,7 +966,7 @@ func authTestUserGet(cx ctlCtx) { expected = []string{ "Error: etcdserver: permission denied", } - if err := spawnWithExpects(append(cx.PrefixArgs(), "user", "get", "root"), expected...); err != nil { + if err := spawnWithExpects(append(cx.PrefixArgs(), "user", "get", "root"), cx.envMap, expected...); err != nil { cx.t.Fatal(err) } } @@ -977,7 +977,7 @@ func authTestRoleList(cx ctlCtx) { } cx.user, cx.pass = "root", "root" authSetupTestUser(cx) - if err := spawnWithExpect(append(cx.PrefixArgs(), "role", "list"), "test-role"); err != nil { + if err := spawnWithExpectWithEnv(append(cx.PrefixArgs(), "role", "list"), cx.envMap, "test-role"); err != nil { cx.t.Fatal(err) } } @@ -1088,7 +1088,7 @@ func certCNAndUsername(cx ctlCtx, noPassword bool) { cx.t.Fatal(err) } } - if err := spawnWithExpect(append(cx.PrefixArgs(), "role", "add", "test-role-cn"), "Role test-role-cn created"); err != nil { + if err := spawnWithExpectWithEnv(append(cx.PrefixArgs(), "role", "add", "test-role-cn"), cx.envMap, "Role test-role-cn created"); err != nil { cx.t.Fatal(err) } if err := ctlV3User(cx, []string{"grant-role", "example.com", "test-role-cn"}, "Role test-role-cn is granted to user example.com", nil); err != nil { diff --git a/tests/e2e/ctl_v3_compact_test.go b/tests/e2e/ctl_v3_compact_test.go index 5b0c51eb4..4852382c8 100644 --- a/tests/e2e/ctl_v3_compact_test.go +++ b/tests/e2e/ctl_v3_compact_test.go @@ -71,5 +71,5 @@ func ctlV3Compact(cx ctlCtx, rev int64, physical bool) error { if physical { cmdArgs = append(cmdArgs, "--physical") } - return spawnWithExpect(cmdArgs, "compacted revision "+rs) + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, "compacted revision "+rs) } diff --git a/tests/e2e/ctl_v3_defrag_test.go b/tests/e2e/ctl_v3_defrag_test.go index 8fbe476f0..f1a63094f 100644 --- a/tests/e2e/ctl_v3_defrag_test.go +++ b/tests/e2e/ctl_v3_defrag_test.go @@ -52,13 +52,13 @@ func ctlV3OnlineDefrag(cx ctlCtx) error { for i := range lines { lines[i] = "Finished defragmenting etcd member" } - return spawnWithExpects(cmdArgs, lines...) + return spawnWithExpects(cmdArgs, cx.envMap, lines...) } func ctlV3OfflineDefrag(cx ctlCtx) error { cmdArgs := append(cx.PrefixArgsUtl(), "defrag", "--data-dir", cx.dataDir) lines := []string{"finished defragmenting directory"} - return spawnWithExpects(cmdArgs, lines...) + return spawnWithExpects(cmdArgs, cx.envMap, lines...) } func defragOfflineTest(cx ctlCtx) { diff --git a/tests/e2e/ctl_v3_elect_test.go b/tests/e2e/ctl_v3_elect_test.go index 332ce9708..9b7891560 100644 --- a/tests/e2e/ctl_v3_elect_test.go +++ b/tests/e2e/ctl_v3_elect_test.go @@ -98,7 +98,7 @@ func testElect(cx ctlCtx) { // ctlV3Elect creates a elect process with a channel listening for when it wins the election. func ctlV3Elect(cx ctlCtx, name, proposal string) (*expect.ExpectProcess, <-chan string, error) { cmdArgs := append(cx.PrefixArgs(), "elect", name, proposal) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) outc := make(chan string, 1) if err != nil { close(outc) diff --git a/tests/e2e/ctl_v3_endpoint_test.go b/tests/e2e/ctl_v3_endpoint_test.go index 3c4e7d72b..33dd7f5c6 100644 --- a/tests/e2e/ctl_v3_endpoint_test.go +++ b/tests/e2e/ctl_v3_endpoint_test.go @@ -40,7 +40,7 @@ func ctlV3EndpointHealth(cx ctlCtx) error { for i := range lines { lines[i] = "is healthy" } - return spawnWithExpects(cmdArgs, lines...) + return spawnWithExpects(cmdArgs, cx.envMap, lines...) } func endpointStatusTest(cx ctlCtx) { @@ -56,7 +56,7 @@ func ctlV3EndpointStatus(cx ctlCtx) error { u, _ := url.Parse(ep) eps = append(eps, u.Host) } - return spawnWithExpects(cmdArgs, eps...) + return spawnWithExpects(cmdArgs, cx.envMap, eps...) } func endpointHashKVTest(cx ctlCtx) { @@ -88,5 +88,5 @@ func ctlV3EndpointHashKV(cx ctlCtx) error { u, _ := url.Parse(ep) ss = append(ss, fmt.Sprintf("%s, %d", u.Host, hresp.Hash)) } - return spawnWithExpects(cmdArgs, ss...) + return spawnWithExpects(cmdArgs, cx.envMap, ss...) } diff --git a/tests/e2e/ctl_v3_kv_test.go b/tests/e2e/ctl_v3_kv_test.go index 1952ddd22..17b156a0d 100644 --- a/tests/e2e/ctl_v3_kv_test.go +++ b/tests/e2e/ctl_v3_kv_test.go @@ -190,7 +190,7 @@ func getFormatTest(cx ctlCtx) { cmdArgs = append(cmdArgs, "--print-value-only") } cmdArgs = append(cmdArgs, "abc") - if err := spawnWithExpect(cmdArgs, tt.wstr); err != nil { + if err := spawnWithExpectWithEnv(cmdArgs, cx.envMap, tt.wstr); err != nil { cx.t.Errorf("#%d: error (%v), wanted %v", i, err, tt.wstr) } } @@ -228,24 +228,24 @@ func getKeysOnlyTest(cx ctlCtx) { cx.t.Fatal(err) } cmdArgs := append(cx.PrefixArgs(), []string{"get", "--keys-only", "key"}...) - if err := spawnWithExpect(cmdArgs, "key"); err != nil { + if err := spawnWithExpectWithEnv(cmdArgs, cx.envMap, "key"); err != nil { cx.t.Fatal(err) } - if err := spawnWithExpects(cmdArgs, "val"); err == nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, "val"); err == nil { cx.t.Fatalf("got value but passed --keys-only") } } func getCountOnlyTest(cx ctlCtx) { cmdArgs := append(cx.PrefixArgs(), []string{"get", "--count-only", "key", "--prefix", "--write-out=fields"}...) - if err := spawnWithExpects(cmdArgs, "\"Count\" : 0"); err != nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, "\"Count\" : 0"); err != nil { cx.t.Fatal(err) } if err := ctlV3Put(cx, "key", "val", ""); err != nil { cx.t.Fatal(err) } cmdArgs = append(cx.PrefixArgs(), []string{"get", "--count-only", "key", "--prefix", "--write-out=fields"}...) - if err := spawnWithExpects(cmdArgs, "\"Count\" : 1"); err != nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, "\"Count\" : 1"); err != nil { cx.t.Fatal(err) } if err := ctlV3Put(cx, "key1", "val", ""); err != nil { @@ -255,21 +255,21 @@ func getCountOnlyTest(cx ctlCtx) { cx.t.Fatal(err) } cmdArgs = append(cx.PrefixArgs(), []string{"get", "--count-only", "key", "--prefix", "--write-out=fields"}...) - if err := spawnWithExpects(cmdArgs, "\"Count\" : 2"); err != nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, "\"Count\" : 2"); err != nil { cx.t.Fatal(err) } if err := ctlV3Put(cx, "key2", "val", ""); err != nil { cx.t.Fatal(err) } cmdArgs = append(cx.PrefixArgs(), []string{"get", "--count-only", "key", "--prefix", "--write-out=fields"}...) - if err := spawnWithExpects(cmdArgs, "\"Count\" : 3"); err != nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, "\"Count\" : 3"); err != nil { cx.t.Fatal(err) } expected := []string{ "\"Count\" : 3", } cmdArgs = append(cx.PrefixArgs(), []string{"get", "--count-only", "key3", "--prefix", "--write-out=fields"}...) - if err := spawnWithExpects(cmdArgs, expected...); err == nil { + if err := spawnWithExpects(cmdArgs, cx.envMap, expected...); err == nil { cx.t.Fatal(err) } } @@ -348,7 +348,7 @@ func ctlV3Put(cx ctlCtx, key, value, leaseID string, flags ...string) error { if len(flags) != 0 { cmdArgs = append(cmdArgs, flags...) } - return spawnWithExpect(cmdArgs, "OK") + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, "OK") } type kv struct { @@ -365,7 +365,7 @@ func ctlV3Get(cx ctlCtx, args []string, kvs ...kv) error { for _, elem := range kvs { lines = append(lines, elem.key, elem.val) } - return spawnWithExpects(cmdArgs, lines...) + return spawnWithExpects(cmdArgs, cx.envMap, lines...) } // ctlV3GetWithErr runs "get" command expecting no output but error @@ -375,11 +375,11 @@ func ctlV3GetWithErr(cx ctlCtx, args []string, errs []string) error { if !cx.quorum { cmdArgs = append(cmdArgs, "--consistency", "s") } - return spawnWithExpects(cmdArgs, errs...) + return spawnWithExpects(cmdArgs, cx.envMap, errs...) } func ctlV3Del(cx ctlCtx, args []string, num int) error { cmdArgs := append(cx.PrefixArgs(), "del") cmdArgs = append(cmdArgs, args...) - return spawnWithExpects(cmdArgs, fmt.Sprintf("%d", num)) + return spawnWithExpects(cmdArgs, cx.envMap, fmt.Sprintf("%d", num)) } diff --git a/tests/e2e/ctl_v3_lease_test.go b/tests/e2e/ctl_v3_lease_test.go index 59e47bc1f..0dc445202 100644 --- a/tests/e2e/ctl_v3_lease_test.go +++ b/tests/e2e/ctl_v3_lease_test.go @@ -113,7 +113,7 @@ func leaseTestGrantTimeToLive(cx ctlCtx) { } cmdArgs := append(cx.PrefixArgs(), "lease", "timetolive", id, "--keys") - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { cx.t.Fatalf("leaseTestGrantTimeToLive: error (%v)", err) } @@ -146,7 +146,7 @@ func leaseTestGrantLeasesList(cx ctlCtx) error { } cmdArgs := append(cx.PrefixArgs(), "lease", "list") - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { return fmt.Errorf("lease list failed (%v)", err) } @@ -177,7 +177,7 @@ func leaseTestTimeToLiveExpire(cx ctlCtx, ttl int) error { time.Sleep(time.Duration(ttl+1) * time.Second) cmdArgs := append(cx.PrefixArgs(), "lease", "timetolive", leaseID) exp := fmt.Sprintf("lease %s already expired", leaseID) - if err = spawnWithExpect(cmdArgs, exp); err != nil { + if err = spawnWithExpectWithEnv(cmdArgs, cx.envMap, exp); err != nil { return fmt.Errorf("lease not properly expired: (%v)", err) } if err := ctlV3Get(cx, []string{"key"}); err != nil { @@ -247,7 +247,7 @@ func leaseTestRevoke(cx ctlCtx) error { func ctlV3LeaseGrant(cx ctlCtx, ttl int) (string, error) { cmdArgs := append(cx.PrefixArgs(), "lease", "grant", strconv.Itoa(ttl)) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { return "", err } @@ -271,7 +271,7 @@ func ctlV3LeaseGrant(cx ctlCtx, ttl int) (string, error) { func ctlV3LeaseKeepAlive(cx ctlCtx, leaseID string) error { cmdArgs := append(cx.PrefixArgs(), "lease", "keep-alive", leaseID) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, nil) if err != nil { return err } @@ -285,7 +285,7 @@ func ctlV3LeaseKeepAlive(cx ctlCtx, leaseID string) error { func ctlV3LeaseKeepAliveOnce(cx ctlCtx, leaseID string) error { cmdArgs := append(cx.PrefixArgs(), "lease", "keep-alive", "--once", leaseID) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, nil) if err != nil { return err } @@ -298,5 +298,5 @@ func ctlV3LeaseKeepAliveOnce(cx ctlCtx, leaseID string) error { func ctlV3LeaseRevoke(cx ctlCtx, leaseID string) error { cmdArgs := append(cx.PrefixArgs(), "lease", "revoke", leaseID) - return spawnWithExpect(cmdArgs, fmt.Sprintf("lease %s revoked", leaseID)) + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, fmt.Sprintf("lease %s revoked", leaseID)) } diff --git a/tests/e2e/ctl_v3_lock_test.go b/tests/e2e/ctl_v3_lock_test.go index e88000a15..5330afb0a 100644 --- a/tests/e2e/ctl_v3_lock_test.go +++ b/tests/e2e/ctl_v3_lock_test.go @@ -119,7 +119,7 @@ func testLockWithCmd(cx ctlCtx) { // ctlV3Lock creates a lock process with a channel listening for when it acquires the lock. func ctlV3Lock(cx ctlCtx, name string) (*expect.ExpectProcess, <-chan string, error) { cmdArgs := append(cx.PrefixArgs(), "lock", name) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) outc := make(chan string, 1) if err != nil { close(outc) @@ -140,5 +140,5 @@ func ctlV3LockWithCmd(cx ctlCtx, execCmd []string, as ...string) error { // use command as lock name cmdArgs := append(cx.PrefixArgs(), "lock", execCmd[0]) cmdArgs = append(cmdArgs, execCmd...) - return spawnWithExpects(cmdArgs, as...) + return spawnWithExpects(cmdArgs, cx.envMap, as...) } diff --git a/tests/e2e/ctl_v3_make_mirror_test.go b/tests/e2e/ctl_v3_make_mirror_test.go index f15340670..491af15bd 100644 --- a/tests/e2e/ctl_v3_make_mirror_test.go +++ b/tests/e2e/ctl_v3_make_mirror_test.go @@ -83,7 +83,7 @@ func testMirrorCommand(cx ctlCtx, flags []string, sourcekvs []kv, destkvs []kvEx cmdArgs := append(cx.PrefixArgs(), "make-mirror") cmdArgs = append(cmdArgs, flags...) cmdArgs = append(cmdArgs, fmt.Sprintf("localhost:%d", mirrorcfg.basePort)) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { cx.t.Fatal(err) } diff --git a/tests/e2e/ctl_v3_member_test.go b/tests/e2e/ctl_v3_member_test.go index 76198569c..f92526ed6 100644 --- a/tests/e2e/ctl_v3_member_test.go +++ b/tests/e2e/ctl_v3_member_test.go @@ -95,13 +95,13 @@ func ctlV3MemberList(cx ctlCtx) error { for i := range lines { lines[i] = "started" } - return spawnWithExpects(cmdArgs, lines...) + return spawnWithExpects(cmdArgs, cx.envMap, lines...) } func getMemberList(cx ctlCtx) (etcdserverpb.MemberListResponse, error) { cmdArgs := append(cx.PrefixArgs(), "--write-out", "json", "member", "list") - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { return etcdserverpb.MemberListResponse{}, err } @@ -130,7 +130,7 @@ func memberListWithHexTest(cx ctlCtx) { cmdArgs := append(cx.PrefixArgs(), "--write-out", "json", "--hex", "member", "list") - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { cx.t.Fatalf("memberListWithHexTest error (%v)", err) } @@ -177,7 +177,7 @@ func memberRemoveTest(cx ctlCtx) { func ctlV3MemberRemove(cx ctlCtx, ep, memberID, clusterID string) error { cmdArgs := append(cx.prefixArgs([]string{ep}), "member", "remove", memberID) - return spawnWithExpect(cmdArgs, fmt.Sprintf("%s removed from cluster %s", memberID, clusterID)) + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, fmt.Sprintf("%s removed from cluster %s", memberID, clusterID)) } func memberAddTest(cx ctlCtx) { @@ -197,7 +197,7 @@ func ctlV3MemberAdd(cx ctlCtx, peerURL string, isLearner bool) error { if isLearner { cmdArgs = append(cmdArgs, "--learner") } - return spawnWithExpect(cmdArgs, " added to cluster ") + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, " added to cluster ") } func memberUpdateTest(cx ctlCtx) { @@ -215,5 +215,5 @@ func memberUpdateTest(cx ctlCtx) { func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error { cmdArgs := append(cx.PrefixArgs(), "member", "update", memberID, fmt.Sprintf("--peer-urls=%s", peerURL)) - return spawnWithExpect(cmdArgs, " updated in cluster ") + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, " updated in cluster ") } diff --git a/tests/e2e/ctl_v3_move_leader_test.go b/tests/e2e/ctl_v3_move_leader_test.go index 08abc37e2..05dc49939 100644 --- a/tests/e2e/ctl_v3_move_leader_test.go +++ b/tests/e2e/ctl_v3_move_leader_test.go @@ -97,21 +97,22 @@ func testCtlV3MoveLeader(t *testing.T, cfg etcdProcessClusterConfig) { } tests := []struct { - prefixes []string - expect string + eps []string + expect string }{ { // request to non-leader - cx.prefixArgs([]string{cx.epc.EndpointsV3()[(leadIdx+1)%3]}), + []string{cx.epc.EndpointsV3()[(leadIdx+1)%3]}, "no leader endpoint given at ", }, { // request to leader - cx.prefixArgs([]string{cx.epc.EndpointsV3()[leadIdx]}), + []string{cx.epc.EndpointsV3()[leadIdx]}, fmt.Sprintf("Leadership transferred from %s to %s", types.ID(leaderID), types.ID(transferee)), }, } for i, tc := range tests { - cmdArgs := append(tc.prefixes, "move-leader", types.ID(transferee).String()) - if err := spawnWithExpect(cmdArgs, tc.expect); err != nil { + prefix := cx.prefixArgs(tc.eps) + cmdArgs := append(prefix, "move-leader", types.ID(transferee).String()) + if err := spawnWithExpectWithEnv(cmdArgs, cx.envMap, tc.expect); err != nil { t.Fatalf("#%d: %v", i, err) } } diff --git a/tests/e2e/ctl_v3_role_test.go b/tests/e2e/ctl_v3_role_test.go index 820b64783..2ca2152f3 100644 --- a/tests/e2e/ctl_v3_role_test.go +++ b/tests/e2e/ctl_v3_role_test.go @@ -140,13 +140,13 @@ func ctlV3RoleMultiExpect(cx ctlCtx, args []string, expStr ...string) error { cmdArgs := append(cx.PrefixArgs(), "role") cmdArgs = append(cmdArgs, args...) - return spawnWithExpects(cmdArgs, expStr...) + return spawnWithExpects(cmdArgs, cx.envMap, expStr...) } func ctlV3Role(cx ctlCtx, args []string, expStr string) error { cmdArgs := append(cx.PrefixArgs(), "role") cmdArgs = append(cmdArgs, args...) - return spawnWithExpect(cmdArgs, expStr) + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, expStr) } func ctlV3RoleGrantPermission(cx ctlCtx, rolename string, perm grantingPerm) error { @@ -160,7 +160,7 @@ func ctlV3RoleGrantPermission(cx ctlCtx, rolename string, perm grantingPerm) err cmdArgs = append(cmdArgs, rolename) cmdArgs = append(cmdArgs, grantingPermToArgs(perm)...) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { return err } @@ -186,7 +186,7 @@ func ctlV3RoleRevokePermission(cx ctlCtx, rolename string, key, rangeEnd string, expStr = fmt.Sprintf("Permission of key %s is revoked from role %s", key, rolename) } - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { return err } diff --git a/tests/e2e/ctl_v3_snapshot_test.go b/tests/e2e/ctl_v3_snapshot_test.go index 92c0268b1..1ee6eb1eb 100644 --- a/tests/e2e/ctl_v3_snapshot_test.go +++ b/tests/e2e/ctl_v3_snapshot_test.go @@ -84,10 +84,11 @@ func snapshotCorruptTest(cx ctlCtx) { datadir := cx.t.TempDir() - serr := spawnWithExpect( + serr := spawnWithExpectWithEnv( append(cx.PrefixArgsUtl(), "snapshot", "restore", "--data-dir", datadir, fpath), + cx.envMap, "expected sha256") if serr != nil { @@ -117,10 +118,11 @@ func snapshotStatusBeforeRestoreTest(cx ctlCtx) { dataDir := cx.t.TempDir() defer os.RemoveAll(dataDir) - serr := spawnWithExpect( + serr := spawnWithExpectWithEnv( append(cx.PrefixArgsUtl(), "snapshot", "restore", "--data-dir", dataDir, fpath), + cx.envMap, "added member") if serr != nil { cx.t.Fatal(serr) @@ -129,13 +131,13 @@ func snapshotStatusBeforeRestoreTest(cx ctlCtx) { func ctlV3SnapshotSave(cx ctlCtx, fpath string) error { cmdArgs := append(cx.PrefixArgs(), "snapshot", "save", fpath) - return spawnWithExpect(cmdArgs, fmt.Sprintf("Snapshot saved at %s", fpath)) + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, fmt.Sprintf("Snapshot saved at %s", fpath)) } func getSnapshotStatus(cx ctlCtx, fpath string) (snapshot.Status, error) { cmdArgs := append(cx.PrefixArgsUtl(), "--write-out", "json", "snapshot", "status", fpath) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, nil) if err != nil { return snapshot.Status{}, err } @@ -203,6 +205,7 @@ func testIssue6361(t *testing.T, etcdutl bool) { t.Log("etcdctl saving snapshot...") if err = spawnWithExpects(append(prefixArgs, "snapshot", "save", fpath), + nil, fmt.Sprintf("Snapshot saved at %s", fpath), ); err != nil { t.Fatal(err) @@ -264,7 +267,7 @@ func testIssue6361(t *testing.T, etcdutl bool) { nepc, err = spawnCmd([]string{epc.procs[0].Config().execPath, "--name", name2, "--listen-client-urls", clientURL, "--advertise-client-urls", clientURL, "--listen-peer-urls", peerURL, "--initial-advertise-peer-urls", peerURL, - "--initial-cluster", initialCluster2, "--initial-cluster-state", "existing", "--data-dir", newDataDir2}) + "--initial-cluster", initialCluster2, "--initial-cluster-state", "existing", "--data-dir", newDataDir2}, nil) if err != nil { t.Fatal(err) } diff --git a/tests/e2e/ctl_v3_test.go b/tests/e2e/ctl_v3_test.go index 5c8bb2fe9..320b81e64 100644 --- a/tests/e2e/ctl_v3_test.go +++ b/tests/e2e/ctl_v3_test.go @@ -104,7 +104,7 @@ func clusterVersionTest(cx ctlCtx, expected string) { func ctlV3Version(cx ctlCtx) error { cmdArgs := append(cx.PrefixArgs(), "version") - return spawnWithExpect(cmdArgs, version.Version) + return spawnWithExpectWithEnv(cmdArgs, cx.envMap, version.Version) } // TestCtlV3DialWithHTTPScheme ensures that client handles endpoints with HTTPS scheme. @@ -114,7 +114,7 @@ func TestCtlV3DialWithHTTPScheme(t *testing.T) { func dialWithSchemeTest(cx ctlCtx) { cmdArgs := append(cx.prefixArgs(cx.epc.EndpointsV3()), "put", "foo", "bar") - if err := spawnWithExpect(cmdArgs, "OK"); err != nil { + if err := spawnWithExpectWithEnv(cmdArgs, cx.envMap, "OK"); err != nil { cx.t.Fatal(err) } } @@ -129,7 +129,7 @@ type ctlCtx struct { epc *etcdProcessCluster - envMap map[string]struct{} + envMap map[string]string dialTimeout time.Duration @@ -201,7 +201,7 @@ func withApiPrefix(p string) ctlOption { } func withFlagByEnv() ctlOption { - return func(cx *ctlCtx) { cx.envMap = make(map[string]struct{}) } + return func(cx *ctlCtx) { cx.envMap = make(map[string]string) } } func withEtcdutl() ctlOption { @@ -248,6 +248,7 @@ func testCtlWithOffline(t *testing.T, testFunc func(ctlCtx), testOfflineFunc fun for k := range ret.envMap { os.Unsetenv(k) } + ret.envMap = make(map[string]string) } if ret.epc != nil { if errC := ret.epc.Close(); errC != nil { @@ -311,8 +312,7 @@ func (cx *ctlCtx) prefixArgs(eps []string) []string { for k, v := range fmap { if useEnv { ek := flags.FlagToEnv("ETCDCTL", k) - os.Setenv(ek, v) - cx.envMap[ek] = struct{}{} + cx.envMap[ek] = v } else { cmdArgs = append(cmdArgs, fmt.Sprintf("--%s=%s", k, v)) } diff --git a/tests/e2e/ctl_v3_txn_test.go b/tests/e2e/ctl_v3_txn_test.go index bbcec5db6..6fd4ed16b 100644 --- a/tests/e2e/ctl_v3_txn_test.go +++ b/tests/e2e/ctl_v3_txn_test.go @@ -102,7 +102,7 @@ func ctlV3Txn(cx ctlCtx, rqs txnRequests) error { if cx.interactive { cmdArgs = append(cmdArgs, "--interactive") } - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { return err } diff --git a/tests/e2e/ctl_v3_user_test.go b/tests/e2e/ctl_v3_user_test.go index 8672ae7b6..d4e409a17 100644 --- a/tests/e2e/ctl_v3_user_test.go +++ b/tests/e2e/ctl_v3_user_test.go @@ -179,7 +179,7 @@ func ctlV3User(cx ctlCtx, args []string, expStr string, stdIn []string) error { cmdArgs := append(cx.PrefixArgs(), "user") cmdArgs = append(cmdArgs, args...) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) if err != nil { return err } diff --git a/tests/e2e/ctl_v3_watch_test.go b/tests/e2e/ctl_v3_watch_test.go index fe25da1b7..0e0f24e94 100644 --- a/tests/e2e/ctl_v3_watch_test.go +++ b/tests/e2e/ctl_v3_watch_test.go @@ -35,7 +35,7 @@ func setupWatchArgs(cx ctlCtx, args []string) []string { func ctlV3Watch(cx ctlCtx, args []string, kvs ...kvExec) error { cmdArgs := setupWatchArgs(cx, args) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, nil) if err != nil { return err } @@ -66,7 +66,7 @@ func ctlV3Watch(cx ctlCtx, args []string, kvs ...kvExec) error { func ctlV3WatchFailPerm(cx ctlCtx, args []string) error { cmdArgs := setupWatchArgs(cx, args) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, nil) if err != nil { return err } diff --git a/tests/e2e/etcd_config_test.go b/tests/e2e/etcd_config_test.go index ef39a52e5..266cb971b 100644 --- a/tests/e2e/etcd_config_test.go +++ b/tests/e2e/etcd_config_test.go @@ -29,7 +29,7 @@ const exampleConfigFile = "../../etcd.conf.yml.sample" func TestEtcdExampleConfig(t *testing.T) { skipInShortMode(t) - proc, err := spawnCmd([]string{binDir + "/etcd", "--config-file", exampleConfigFile}) + proc, err := spawnCmd([]string{binDir + "/etcd", "--config-file", exampleConfigFile}, nil) if err != nil { t.Fatal(err) } @@ -75,7 +75,7 @@ func TestEtcdMultiPeer(t *testing.T) { "--initial-advertise-peer-urls", fmt.Sprintf("http://127.0.0.1:%d", etcdProcessBasePort+i), "--initial-cluster", ic, } - p, err := spawnCmd(args) + p, err := spawnCmd(args, nil) if err != nil { t.Fatal(err) } @@ -106,7 +106,7 @@ func TestEtcdUnixPeers(t *testing.T) { "--listen-peer-urls", "unix://etcd.unix:1", "--initial-advertise-peer-urls", "unix://etcd.unix:1", "--initial-cluster", "e1=unix://etcd.unix:1", - }, + }, nil, ) defer os.Remove("etcd.unix:1") if err != nil { @@ -183,7 +183,7 @@ func TestEtcdPeerCNAuth(t *testing.T) { commonArgs = append(commonArgs, args...) - p, err := spawnCmd(commonArgs) + p, err := spawnCmd(commonArgs, nil) if err != nil { t.Fatal(err) } @@ -262,7 +262,7 @@ func TestEtcdPeerNameAuth(t *testing.T) { commonArgs = append(commonArgs, args...) - p, err := spawnCmd(commonArgs) + p, err := spawnCmd(commonArgs, nil) if err != nil { t.Fatal(err) } @@ -308,7 +308,7 @@ func TestGrpcproxyAndCommonName(t *testing.T) { t.Errorf("Unexpected error: %s", err) } - p, err := spawnCmd(argsWithEmptyCN) + p, err := spawnCmd(argsWithEmptyCN, nil) defer func() { if p != nil { p.Stop() @@ -323,7 +323,7 @@ func TestGrpcproxyAndCommonName(t *testing.T) { func TestBootstrapDefragFlag(t *testing.T) { skipInShortMode(t) - proc, err := spawnCmd([]string{binDir + "/etcd", "--experimental-bootstrap-defrag-threshold-megabytes", "1000"}) + proc, err := spawnCmd([]string{binDir + "/etcd", "--experimental-bootstrap-defrag-threshold-megabytes", "1000"}, nil) if err != nil { t.Fatal(err) } diff --git a/tests/e2e/etcd_corrupt_test.go b/tests/e2e/etcd_corrupt_test.go index 0fd7be373..2b0730b55 100644 --- a/tests/e2e/etcd_corrupt_test.go +++ b/tests/e2e/etcd_corrupt_test.go @@ -87,7 +87,7 @@ func corruptTest(cx ctlCtx) { cx.t.Log("restarting etcd[0]") ep := cx.epc.procs[0] - proc, err := spawnCmd(append([]string{ep.Config().execPath}, ep.Config().args...)) + proc, err := spawnCmd(append([]string{ep.Config().execPath}, ep.Config().args...), cx.envMap) if err != nil { cx.t.Fatal(err) } diff --git a/tests/e2e/etcd_process.go b/tests/e2e/etcd_process.go index f744fa81c..c61001cec 100644 --- a/tests/e2e/etcd_process.go +++ b/tests/e2e/etcd_process.go @@ -92,7 +92,7 @@ func (ep *etcdServerProcess) Start() error { panic("already started") } ep.cfg.lg.Info("starting server...", zap.String("name", ep.cfg.name)) - proc, err := spawnCmdWithLogger(ep.cfg.lg, append([]string{ep.cfg.execPath}, ep.cfg.args...)) + proc, err := spawnCmdWithLogger(ep.cfg.lg, append([]string{ep.cfg.execPath}, ep.cfg.args...), nil) if err != nil { return err } diff --git a/tests/e2e/etcd_spawn_nocov.go b/tests/e2e/etcd_spawn_nocov.go index b0e872fb2..2ed551394 100644 --- a/tests/e2e/etcd_spawn_nocov.go +++ b/tests/e2e/etcd_spawn_nocov.go @@ -18,6 +18,7 @@ package e2e import ( + "fmt" "os" "strings" @@ -27,20 +28,41 @@ import ( const noOutputLineCount = 0 // regular binaries emit no extra lines -func spawnCmd(args []string) (*expect.ExpectProcess, error) { - return spawnCmdWithLogger(zap.NewNop(), args) +func spawnCmd(args []string, envVars map[string]string) (*expect.ExpectProcess, error) { + return spawnCmdWithLogger(zap.NewNop(), args, envVars) } -func spawnCmdWithLogger(lg *zap.Logger, args []string) (*expect.ExpectProcess, error) { +func spawnCmdWithLogger(lg *zap.Logger, args []string, envVars map[string]string) (*expect.ExpectProcess, error) { wd, err := os.Getwd() if err != nil { return nil, err } + env := mergeEnvVariables(envVars) if strings.HasSuffix(args[0], "/etcdctl3") { - env := append(os.Environ(), "ETCDCTL_API=3") - lg.Info("spawning process with ETCDCTL_API=3", zap.Strings("args", args), zap.String("working-dir", wd)) + env = append(env, "ETCDCTL_API=3") + lg.Info("spawning process with ETCDCTL_API=3", zap.Strings("args", args), zap.String("working-dir", wd), zap.Strings("environment-variables", env)) return expect.NewExpectWithEnv(ctlBinPath, args[1:], env) } - lg.Info("spawning process", zap.Strings("args", args), zap.String("working-dir", wd)) - return expect.NewExpect(args[0], args[1:]...) + lg.Info("spawning process", zap.Strings("args", args), zap.String("working-dir", wd), zap.Strings("environment-variables", env)) + return expect.NewExpectWithEnv(args[0], args[1:], env) +} + +func mergeEnvVariables(envVars map[string]string) []string { + var env []string + // Environment variables are passed as parameter have higher priority + // than os environment variables. + for k, v := range envVars { + env = append(env, fmt.Sprintf("%s=%s", k, v)) + } + + // Now, we can set os environment variables not passed as parameter. + currVars := os.Environ() + for _, v := range currVars { + p := strings.Split(v, "=") + if _, ok := envVars[p[0]]; !ok { + env = append(env, fmt.Sprintf("%s=%s", p[0], p[1])) + } + } + + return env } diff --git a/tests/e2e/util.go b/tests/e2e/util.go index 2841d94fb..2aa45bc95 100644 --- a/tests/e2e/util.go +++ b/tests/e2e/util.go @@ -40,16 +40,20 @@ func waitReadyExpectProc(exproc *expect.ExpectProcess, readyStrs []string) error } func spawnWithExpect(args []string, expected string) error { - return spawnWithExpects(args, []string{expected}...) + return spawnWithExpects(args, nil, []string{expected}...) } -func spawnWithExpects(args []string, xs ...string) error { - _, err := spawnWithExpectLines(args, xs...) +func spawnWithExpectWithEnv(args []string, envVars map[string]string, expected string) error { + return spawnWithExpects(args, envVars, []string{expected}...) +} + +func spawnWithExpects(args []string, envVars map[string]string, xs ...string) error { + _, err := spawnWithExpectLines(args, envVars, xs...) return err } -func spawnWithExpectLines(args []string, xs ...string) ([]string, error) { - proc, err := spawnCmd(args) +func spawnWithExpectLines(args []string, envVars map[string]string, xs ...string) ([]string, error) { + proc, err := spawnCmd(args, envVars) if err != nil { return nil, err } diff --git a/tests/e2e/v2store_deprecation_test.go b/tests/e2e/v2store_deprecation_test.go index 06ad555d0..cf6c28200 100644 --- a/tests/e2e/v2store_deprecation_test.go +++ b/tests/e2e/v2store_deprecation_test.go @@ -63,7 +63,7 @@ func assertVerifyCanStartV2deprecationNotYet(t testing.TB, dataDirPath string) { func assertVerifyCannotStartV2deprecationWriteOnly(t testing.TB, dataDirPath string) { t.Log("Verify its infeasible to start etcd with --v2-deprecation=write-only mode") - proc, err := spawnCmd([]string{binDir + "/etcd", "--v2-deprecation=write-only", "--data-dir=" + dataDirPath}) + proc, err := spawnCmd([]string{binDir + "/etcd", "--v2-deprecation=write-only", "--data-dir=" + dataDirPath}, nil) assert.NoError(t, err) _, err = proc.Expect("detected disallowed custom content in v2store for stage --v2-deprecation=write-only") @@ -90,7 +90,7 @@ func TestV2Deprecation(t *testing.T) { func TestV2DeprecationWriteOnlyNoV2Api(t *testing.T) { BeforeTest(t) - proc, err := spawnCmd([]string{binDir + "/etcd", "--v2-deprecation=write-only", "--enable-v2"}) + proc, err := spawnCmd([]string{binDir + "/etcd", "--v2-deprecation=write-only", "--enable-v2"}, nil) assert.NoError(t, err) _, err = proc.Expect("--enable-v2 and --v2-deprecation=write-only are mutually exclusive") diff --git a/tests/e2e/v3_curl_test.go b/tests/e2e/v3_curl_test.go index 4a4e828d5..ce4ab5a0c 100644 --- a/tests/e2e/v3_curl_test.go +++ b/tests/e2e/v3_curl_test.go @@ -243,7 +243,7 @@ func testV3CurlAuth(cx ctlCtx) { ) cmdArgs = cURLPrefixArgs(cx.epc, "POST", cURLReq{endpoint: path.Join(p, "/auth/authenticate"), value: string(authreq)}) - proc, err := spawnCmd(cmdArgs) + proc, err := spawnCmd(cmdArgs, cx.envMap) testutil.AssertNil(cx.t, err) defer proc.Close() @@ -285,7 +285,7 @@ func testV3CurlCampaign(cx ctlCtx) { endpoint: path.Join(cx.apiPrefix, "/election/campaign"), value: string(cdata), }) - lines, err := spawnWithExpectLines(cargs, `"leader":{"name":"`) + lines, err := spawnWithExpectLines(cargs, cx.envMap, `"leader":{"name":"`) if err != nil { cx.t.Fatalf("failed post campaign request (%s) (%v)", cx.apiPrefix, err) } From 6c8a4fdcc5f84022782893c89cc009c03323badb Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 5 Aug 2021 10:51:31 +0200 Subject: [PATCH 06/43] server: Rename bootstrappedWal variables to bwal to separate it from wal package --- server/etcdserver/bootstrap.go | 46 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index da9cc6da0..2a1b4b92a 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -357,7 +357,7 @@ func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Back func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID) *bootstrappedRaft { member := cl.MemberByName(cfg.Name) id := member.ID - wal := bootstrapNewWAL(cfg, id, cl.ID()) + bwal := bootstrapNewWAL(cfg, id, cl.ID()) peers := make([]raft.Peer, len(ids)) for i, id := range ids { var ctx []byte @@ -372,66 +372,66 @@ func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluste zap.String("local-member-id", id.String()), zap.String("cluster-id", cl.ID().String()), ) - s := wal.MemoryStorage() + s := bwal.MemoryStorage() return &bootstrappedRaft{ lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, cl: cl, - config: raftConfig(cfg, uint64(wal.id), s), + config: raftConfig(cfg, uint64(bwal.id), s), peers: peers, storage: s, - wal: wal, + wal: bwal, } } func bootstrapRaftFromWal(cfg config.ServerConfig, snapshot *raftpb.Snapshot) *bootstrappedRaft { - wal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) + bwal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) cfg.Logger.Info( "restarting local member", - zap.String("cluster-id", wal.cid.String()), - zap.String("local-member-id", wal.id.String()), - zap.Uint64("commit-index", wal.st.Commit), + zap.String("cluster-id", bwal.cid.String()), + zap.String("local-member-id", bwal.id.String()), + zap.Uint64("commit-index", bwal.st.Commit), ) cl := membership.NewCluster(cfg.Logger) - cl.SetID(wal.id, wal.cid) - s := wal.MemoryStorage() + cl.SetID(bwal.id, bwal.cid) + s := bwal.MemoryStorage() return &bootstrappedRaft{ lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, cl: cl, - config: raftConfig(cfg, uint64(wal.id), s), + config: raftConfig(cfg, uint64(bwal.id), s), storage: s, - wal: wal, + wal: bwal, } } func bootstrapRaftFromWalStandalone(cfg config.ServerConfig, snapshot *raftpb.Snapshot) *bootstrappedRaft { - wal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) + bwal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) // discard the previously uncommitted entries - wal.ents = wal.CommitedEntries() - entries := wal.ConfigChangeEntries() + bwal.ents = bwal.CommitedEntries() + entries := bwal.ConfigChangeEntries() // force commit config change entries - wal.AppendAndCommitEntries(entries) + bwal.AppendAndCommitEntries(entries) cfg.Logger.Info( "forcing restart member", - zap.String("cluster-id", wal.cid.String()), - zap.String("local-member-id", wal.id.String()), - zap.Uint64("commit-index", wal.st.Commit), + zap.String("cluster-id", bwal.cid.String()), + zap.String("local-member-id", bwal.id.String()), + zap.Uint64("commit-index", bwal.st.Commit), ) cl := membership.NewCluster(cfg.Logger) - cl.SetID(wal.id, wal.cid) - s := wal.MemoryStorage() + cl.SetID(bwal.id, bwal.cid) + s := bwal.MemoryStorage() return &bootstrappedRaft{ lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, cl: cl, - config: raftConfig(cfg, uint64(wal.id), s), + config: raftConfig(cfg, uint64(bwal.id), s), storage: s, - wal: wal, + wal: bwal, } } From aa0c05000319ff6db6519d0cc0f98373f97b3322 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 10:54:51 +0200 Subject: [PATCH 07/43] etcdserver: Add more hierarchy bootstap introducing a separate storage bootstrap step --- server/etcdserver/bootstrap.go | 113 +++++++++++++++++++-------------- server/etcdserver/server.go | 36 +++++------ 2 files changed, 84 insertions(+), 65 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 2a1b4b92a..60ffc5fc6 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -49,7 +49,6 @@ import ( ) func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { - st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) if cfg.MaxRequestBytes > recommendedMaxRequestBytes { cfg.Logger.Warn( @@ -64,9 +63,49 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil { return nil, fmt.Errorf("cannot access data directory: %v", terr) } - - haveWAL := wal.Exist(cfg.WALDir()) ss := bootstrapSnapshot(cfg) + prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.PeerDialTimeout()) + if err != nil { + return nil, err + } + + if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { + return nil, fmt.Errorf("cannot access member directory: %v", terr) + } + + storage, err := bootstrapStorage(cfg, ss, prt) + if err != nil { + return nil, err + } + return &bootstrappedServer{ + prt: prt, + ss: ss, + storage: storage, + }, nil +} + +type bootstrappedServer struct { + storage *bootstrappedStorage + prt http.RoundTripper + ss *snap.Snapshotter +} + +type bootstrappedStorage struct { + cluster *bootstrapedCluster + beHooks *serverstorage.BackendHooks + st v2store.Store + be backend.Backend + ci cindex.ConsistentIndexer +} + +type bootstrapedCluster struct { + raft *bootstrappedRaft + remotes []*membership.Member +} + +func bootstrapStorage(cfg config.ServerConfig, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { + st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) + haveWAL := wal.Exist(cfg.WALDir()) be, ci, beExist, beHooks, err := bootstrapBackend(cfg) if err != nil { @@ -77,19 +116,14 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { be.Close() } }() - - prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.PeerDialTimeout()) - if err != nil { - return nil, err - } - + var c *bootstrapedCluster switch { case !haveWAL && !cfg.NewCluster: - b, err = bootstrapExistingClusterNoWAL(cfg, prt, st, be) + c, err = bootstrapExistingClusterNoWAL(cfg, prt, st, be) case !haveWAL && cfg.NewCluster: - b, err = bootstrapNewClusterNoWAL(cfg, prt, st, be) + c, err = bootstrapNewClusterNoWAL(cfg, prt, st, be) case haveWAL: - b, err = bootstrapWithWAL(cfg, st, be, ss, beExist, beHooks, ci) + c, err = bootstrapWithWAL(cfg, st, be, ss, beExist, beHooks, ci) default: be.Close() return nil, fmt.Errorf("unsupported bootstrap config") @@ -97,28 +131,13 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { if err != nil { return nil, err } - - if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { - return nil, fmt.Errorf("cannot access member directory: %v", terr) - } - b.prt = prt - b.ci = ci - b.st = st - b.be = be - b.ss = ss - b.beHooks = beHooks - return b, nil -} - -type bootstrappedServer struct { - raft *bootstrappedRaft - remotes []*membership.Member - prt http.RoundTripper - ci cindex.ConsistentIndexer - st v2store.Store - be backend.Backend - ss *snap.Snapshotter - beHooks *serverstorage.BackendHooks + return &bootstrappedStorage{ + cluster: c, + beHooks: beHooks, + st: st, + be: be, + ci: ci, + }, nil } func bootstrapSnapshot(cfg config.ServerConfig) *snap.Snapshotter { @@ -192,7 +211,7 @@ func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error { return be.Defrag() } -func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrappedServer, error) { +func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrapedCluster, error) { if err := cfg.VerifyJoinExisting(); err != nil { return nil, err } @@ -217,13 +236,13 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) br := bootstrapRaftFromCluster(cfg, cl, nil) cl.SetID(br.wal.id, existingCluster.ID()) - return &bootstrappedServer{ + return &bootstrapedCluster{ raft: br, remotes: remotes, }, nil } -func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrappedServer, error) { +func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrapedCluster, error) { if err := cfg.VerifyBootstrap(); err != nil { return nil, err } @@ -257,13 +276,13 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) br := bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs()) cl.SetID(br.wal.id, cl.ID()) - return &bootstrappedServer{ + return &bootstrapedCluster{ remotes: nil, raft: br, }, nil } -func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Backend, ss *snap.Snapshotter, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer) (*bootstrappedServer, error) { +func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Backend, ss *snap.Snapshotter, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer) (*bootstrapedCluster, error) { if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } @@ -336,22 +355,22 @@ func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Back cfg.Logger.Info("No snapshot found. Recovering WAL from scratch!") } - r := &bootstrappedServer{} + b := &bootstrapedCluster{} if !cfg.ForceNewCluster { - r.raft = bootstrapRaftFromWal(cfg, snapshot) + b.raft = bootstrapRaftFromWal(cfg, snapshot) } else { - r.raft = bootstrapRaftFromWalStandalone(cfg, snapshot) + b.raft = bootstrapRaftFromWalStandalone(cfg, snapshot) } - r.raft.cl.SetStore(st) - r.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) - r.raft.cl.Recover(api.UpdateCapability) - if r.raft.cl.Version() != nil && !r.raft.cl.Version().LessThan(semver.Version{Major: 3}) && !beExist { + b.raft.cl.SetStore(st) + b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) + b.raft.cl.Recover(api.UpdateCapability) + if b.raft.cl.Version() != nil && !b.raft.cl.Version().LessThan(semver.Version{Major: 3}) && !beExist { bepath := cfg.BackendPath() os.RemoveAll(bepath) return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) } - return r, nil + return b, nil } func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID) *bootstrappedRaft { diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 4a9d55efa..2453329f8 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -304,12 +304,12 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { defer func() { if err != nil { - b.be.Close() + b.storage.be.Close() } }() - sstats := stats.NewServerStats(cfg.Name, b.raft.wal.id.String()) - lstats := stats.NewLeaderStats(cfg.Logger, b.raft.wal.id.String()) + sstats := stats.NewServerStats(cfg.Name, b.storage.cluster.raft.wal.id.String()) + lstats := stats.NewLeaderStats(cfg.Logger, b.storage.cluster.raft.wal.id.String()) heartbeat := time.Duration(cfg.TickMs) * time.Millisecond srv = &EtcdServer{ @@ -318,28 +318,28 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { lgMu: new(sync.RWMutex), lg: cfg.Logger, errorc: make(chan error, 1), - v2store: b.st, + v2store: b.storage.st, snapshotter: b.ss, - r: *b.raft.newRaftNode(b.ss), - id: b.raft.wal.id, + r: *b.storage.cluster.raft.newRaftNode(b.ss), + id: b.storage.cluster.raft.wal.id, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, - cluster: b.raft.cl, + cluster: b.storage.cluster.raft.cl, stats: sstats, lstats: lstats, SyncTicker: time.NewTicker(500 * time.Millisecond), peerRt: b.prt, - reqIDGen: idutil.NewGenerator(uint16(b.raft.wal.id), time.Now()), + reqIDGen: idutil.NewGenerator(uint16(b.storage.cluster.raft.wal.id), time.Now()), AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist}, - consistIndex: b.ci, + consistIndex: b.storage.ci, firstCommitInTerm: notify.NewNotifier(), clusterVersionChanged: notify.NewNotifier(), } - serverID.With(prometheus.Labels{"server_id": b.raft.wal.id.String()}).Set(1) + serverID.With(prometheus.Labels{"server_id": b.storage.cluster.raft.wal.id.String()}).Set(1) srv.cluster.SetVersionChangedNotifier(srv.clusterVersionChanged) srv.applyV2 = NewApplierV2(cfg.Logger, srv.v2store, srv.cluster) - srv.be = b.be - srv.beHooks = b.beHooks + srv.be = b.storage.be + srv.beHooks = b.storage.beHooks minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. @@ -403,9 +403,9 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { Logger: cfg.Logger, TLSInfo: cfg.PeerTLSInfo, DialTimeout: cfg.PeerDialTimeout(), - ID: b.raft.wal.id, + ID: b.storage.cluster.raft.wal.id, URLs: cfg.PeerURLs, - ClusterID: b.raft.cl.ID(), + ClusterID: b.storage.cluster.raft.cl.ID(), Raft: srv, Snapshotter: b.ss, ServerStats: sstats, @@ -416,13 +416,13 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { return nil, err } // add all remotes into transport - for _, m := range b.remotes { - if m.ID != b.raft.wal.id { + for _, m := range b.storage.cluster.remotes { + if m.ID != b.storage.cluster.raft.wal.id { tr.AddRemote(m.ID, m.PeerURLs) } } - for _, m := range b.raft.cl.Members() { - if m.ID != b.raft.wal.id { + for _, m := range b.storage.cluster.raft.cl.Members() { + if m.ID != b.storage.cluster.raft.wal.id { tr.AddPeer(m.ID, m.PeerURLs) } } From db06a4ab28e23170f6cefc70e7b40e2c9410d11d Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 11:51:18 +0200 Subject: [PATCH 08/43] server: Move wal bootstrap up the hierarchy --- server/etcdserver/bootstrap.go | 104 ++++++++++++++------------------- server/etcdserver/server.go | 18 +++--- 2 files changed, 52 insertions(+), 70 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 60ffc5fc6..aafb37969 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -101,6 +101,7 @@ type bootstrappedStorage struct { type bootstrapedCluster struct { raft *bootstrappedRaft remotes []*membership.Member + wal *bootstrappedWAL } func bootstrapStorage(cfg config.ServerConfig, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { @@ -123,7 +124,7 @@ func bootstrapStorage(cfg config.ServerConfig, ss *snap.Snapshotter, prt http.Ro case !haveWAL && cfg.NewCluster: c, err = bootstrapNewClusterNoWAL(cfg, prt, st, be) case haveWAL: - c, err = bootstrapWithWAL(cfg, st, be, ss, beExist, beHooks, ci) + c, err = bootstrapClusterWithWAL(cfg, st, be, ss, beExist, beHooks, ci) default: be.Close() return nil, fmt.Errorf("unsupported bootstrap config") @@ -234,11 +235,13 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe cl.SetID(types.ID(0), existingCluster.ID()) cl.SetStore(st) cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) - br := bootstrapRaftFromCluster(cfg, cl, nil) - cl.SetID(br.wal.id, existingCluster.ID()) + bwal := bootstrapNewWAL(cfg, cl.MemberByName(cfg.Name).ID, cl.ID()) + br := bootstrapRaftFromCluster(cfg, cl, nil, bwal) + cl.SetID(bwal.id, existingCluster.ID()) return &bootstrapedCluster{ raft: br, remotes: remotes, + wal: bwal, }, nil } @@ -274,15 +277,17 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st } cl.SetStore(st) cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) - br := bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs()) - cl.SetID(br.wal.id, cl.ID()) + bwal := bootstrapNewWAL(cfg, cl.MemberByName(cfg.Name).ID, cl.ID()) + br := bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs(), bwal) + cl.SetID(bwal.id, cl.ID()) return &bootstrapedCluster{ remotes: nil, raft: br, + wal: bwal, }, nil } -func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Backend, ss *snap.Snapshotter, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer) (*bootstrapedCluster, error) { +func bootstrapClusterWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Backend, ss *snap.Snapshotter, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer) (*bootstrapedCluster, error) { if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } @@ -294,7 +299,7 @@ func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Back if cfg.ShouldDiscover() { cfg.Logger.Warn( "discovery token is ignored since cluster already initialized; valid logs are found", - zap.String("wal-dir", cfg.WALDir()), + zap.String("bwal-dir", cfg.WALDir()), ) } @@ -304,7 +309,7 @@ func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Back return nil, err } // snapshot files can be orphaned if etcd crashes after writing them but before writing the corresponding - // wal log entries + // bwal log entries snapshot, err := ss.LoadNewestAvailable(walSnaps) if err != nil && err != snap.ErrNoSnapshot { return nil, err @@ -355,12 +360,31 @@ func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Back cfg.Logger.Info("No snapshot found. Recovering WAL from scratch!") } - b := &bootstrapedCluster{} - if !cfg.ForceNewCluster { - b.raft = bootstrapRaftFromWal(cfg, snapshot) - } else { - b.raft = bootstrapRaftFromWalStandalone(cfg, snapshot) + bwal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) + b := &bootstrapedCluster{ + wal: bwal, } + if cfg.ForceNewCluster { + // discard the previously uncommitted entries + bwal.ents = bwal.CommitedEntries() + entries := bwal.ConfigChangeEntries() + // force commit config change entries + bwal.AppendAndCommitEntries(entries) + cfg.Logger.Info( + "forcing restart member", + zap.String("cluster-id", bwal.cid.String()), + zap.String("local-member-id", bwal.id.String()), + zap.Uint64("commit-index", bwal.st.Commit), + ) + } else { + cfg.Logger.Info( + "restarting local member", + zap.String("cluster-id", bwal.cid.String()), + zap.String("local-member-id", bwal.id.String()), + zap.Uint64("commit-index", bwal.st.Commit), + ) + } + b.raft = bootstrapRaftFromWal(cfg, bwal) b.raft.cl.SetStore(st) b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) @@ -373,10 +397,8 @@ func bootstrapWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Back return b, nil } -func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID) *bootstrappedRaft { +func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID, bwal *bootstrappedWAL) *bootstrappedRaft { member := cl.MemberByName(cfg.Name) - id := member.ID - bwal := bootstrapNewWAL(cfg, id, cl.ID()) peers := make([]raft.Peer, len(ids)) for i, id := range ids { var ctx []byte @@ -388,7 +410,7 @@ func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluste } cfg.Logger.Info( "starting local member", - zap.String("local-member-id", id.String()), + zap.String("local-member-id", member.ID.String()), zap.String("cluster-id", cl.ID().String()), ) s := bwal.MemoryStorage() @@ -396,22 +418,13 @@ func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluste lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, cl: cl, - config: raftConfig(cfg, uint64(bwal.id), s), + config: raftConfig(cfg, uint64(member.ID), s), peers: peers, storage: s, - wal: bwal, } } -func bootstrapRaftFromWal(cfg config.ServerConfig, snapshot *raftpb.Snapshot) *bootstrappedRaft { - bwal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) - - cfg.Logger.Info( - "restarting local member", - zap.String("cluster-id", bwal.cid.String()), - zap.String("local-member-id", bwal.id.String()), - zap.Uint64("commit-index", bwal.st.Commit), - ) +func bootstrapRaftFromWal(cfg config.ServerConfig, bwal *bootstrappedWAL) *bootstrappedRaft { cl := membership.NewCluster(cfg.Logger) cl.SetID(bwal.id, bwal.cid) s := bwal.MemoryStorage() @@ -421,36 +434,6 @@ func bootstrapRaftFromWal(cfg config.ServerConfig, snapshot *raftpb.Snapshot) *b cl: cl, config: raftConfig(cfg, uint64(bwal.id), s), storage: s, - wal: bwal, - } -} - -func bootstrapRaftFromWalStandalone(cfg config.ServerConfig, snapshot *raftpb.Snapshot) *bootstrappedRaft { - bwal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) - - // discard the previously uncommitted entries - bwal.ents = bwal.CommitedEntries() - entries := bwal.ConfigChangeEntries() - // force commit config change entries - bwal.AppendAndCommitEntries(entries) - - cfg.Logger.Info( - "forcing restart member", - zap.String("cluster-id", bwal.cid.String()), - zap.String("local-member-id", bwal.id.String()), - zap.Uint64("commit-index", bwal.st.Commit), - ) - - cl := membership.NewCluster(cfg.Logger) - cl.SetID(bwal.id, bwal.cid) - s := bwal.MemoryStorage() - return &bootstrappedRaft{ - lg: cfg.Logger, - heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, - cl: cl, - config: raftConfig(cfg, uint64(bwal.id), s), - storage: s, - wal: bwal, } } @@ -476,10 +459,9 @@ type bootstrappedRaft struct { config *raft.Config cl *membership.RaftCluster storage *raft.MemoryStorage - wal *bootstrappedWAL } -func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter) *raftNode { +func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL) *raftNode { var n raft.Node if len(b.peers) == 0 { n = raft.RestartNode(b.config) @@ -496,7 +478,7 @@ func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter) *raftNode { Node: n, heartbeat: b.heartbeat, raftStorage: b.storage, - storage: NewStorage(b.wal.w, ss), + storage: NewStorage(wal, ss), }, ) } diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 2453329f8..89d043ece 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -308,8 +308,8 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { } }() - sstats := stats.NewServerStats(cfg.Name, b.storage.cluster.raft.wal.id.String()) - lstats := stats.NewLeaderStats(cfg.Logger, b.storage.cluster.raft.wal.id.String()) + sstats := stats.NewServerStats(cfg.Name, b.storage.cluster.wal.id.String()) + lstats := stats.NewLeaderStats(cfg.Logger, b.storage.cluster.wal.id.String()) heartbeat := time.Duration(cfg.TickMs) * time.Millisecond srv = &EtcdServer{ @@ -320,21 +320,21 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { errorc: make(chan error, 1), v2store: b.storage.st, snapshotter: b.ss, - r: *b.storage.cluster.raft.newRaftNode(b.ss), - id: b.storage.cluster.raft.wal.id, + r: *b.storage.cluster.raft.newRaftNode(b.ss, b.storage.cluster.wal.w), + id: b.storage.cluster.wal.id, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, cluster: b.storage.cluster.raft.cl, stats: sstats, lstats: lstats, SyncTicker: time.NewTicker(500 * time.Millisecond), peerRt: b.prt, - reqIDGen: idutil.NewGenerator(uint16(b.storage.cluster.raft.wal.id), time.Now()), + reqIDGen: idutil.NewGenerator(uint16(b.storage.cluster.wal.id), time.Now()), AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist}, consistIndex: b.storage.ci, firstCommitInTerm: notify.NewNotifier(), clusterVersionChanged: notify.NewNotifier(), } - serverID.With(prometheus.Labels{"server_id": b.storage.cluster.raft.wal.id.String()}).Set(1) + serverID.With(prometheus.Labels{"server_id": b.storage.cluster.wal.id.String()}).Set(1) srv.cluster.SetVersionChangedNotifier(srv.clusterVersionChanged) srv.applyV2 = NewApplierV2(cfg.Logger, srv.v2store, srv.cluster) @@ -403,7 +403,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { Logger: cfg.Logger, TLSInfo: cfg.PeerTLSInfo, DialTimeout: cfg.PeerDialTimeout(), - ID: b.storage.cluster.raft.wal.id, + ID: b.storage.cluster.wal.id, URLs: cfg.PeerURLs, ClusterID: b.storage.cluster.raft.cl.ID(), Raft: srv, @@ -417,12 +417,12 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { } // add all remotes into transport for _, m := range b.storage.cluster.remotes { - if m.ID != b.storage.cluster.raft.wal.id { + if m.ID != b.storage.cluster.wal.id { tr.AddRemote(m.ID, m.PeerURLs) } } for _, m := range b.storage.cluster.raft.cl.Members() { - if m.ID != b.storage.cluster.raft.wal.id { + if m.ID != b.storage.cluster.wal.id { tr.AddPeer(m.ID, m.PeerURLs) } } From 6a4ea70aef49db1cd193570048e63b0b694c026f Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 12:19:35 +0200 Subject: [PATCH 09/43] server: Move clusterID and nodeID up the bootstrap hierarchy --- server/etcdserver/bootstrap.go | 86 +++++++++++++++++++--------------- server/etcdserver/server.go | 16 +++---- 2 files changed, 56 insertions(+), 46 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index aafb37969..cd63b44a9 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -99,9 +99,10 @@ type bootstrappedStorage struct { } type bootstrapedCluster struct { - raft *bootstrappedRaft - remotes []*membership.Member - wal *bootstrappedWAL + raft *bootstrappedRaft + remotes []*membership.Member + wal *bootstrappedWAL + clusterID, nodeID types.ID } func bootstrapStorage(cfg config.ServerConfig, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { @@ -235,13 +236,16 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe cl.SetID(types.ID(0), existingCluster.ID()) cl.SetStore(st) cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) - bwal := bootstrapNewWAL(cfg, cl.MemberByName(cfg.Name).ID, cl.ID()) + member := cl.MemberByName(cfg.Name) + bwal := bootstrapNewWAL(cfg, member, cl) br := bootstrapRaftFromCluster(cfg, cl, nil, bwal) - cl.SetID(bwal.id, existingCluster.ID()) + cl.SetID(member.ID, existingCluster.ID()) return &bootstrapedCluster{ - raft: br, - remotes: remotes, - wal: bwal, + raft: br, + remotes: remotes, + wal: bwal, + clusterID: cl.ID(), + nodeID: member.ID, }, nil } @@ -277,13 +281,16 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st } cl.SetStore(st) cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) - bwal := bootstrapNewWAL(cfg, cl.MemberByName(cfg.Name).ID, cl.ID()) + member := cl.MemberByName(cfg.Name) + bwal := bootstrapNewWAL(cfg, member, cl) br := bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs(), bwal) - cl.SetID(bwal.id, cl.ID()) + cl.SetID(member.ID, cl.ID()) return &bootstrapedCluster{ - remotes: nil, - raft: br, - wal: bwal, + remotes: nil, + raft: br, + wal: bwal, + clusterID: cl.ID(), + nodeID: member.ID, }, nil } @@ -360,31 +367,34 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, st v2store.Store, be backe cfg.Logger.Info("No snapshot found. Recovering WAL from scratch!") } - bwal := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) + bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) + b := &bootstrapedCluster{ - wal: bwal, + wal: bwal, + clusterID: meta.clusterID, + nodeID: meta.nodeID, } if cfg.ForceNewCluster { // discard the previously uncommitted entries bwal.ents = bwal.CommitedEntries() - entries := bwal.ConfigChangeEntries() + entries := bwal.ConfigChangeEntries(meta) // force commit config change entries bwal.AppendAndCommitEntries(entries) cfg.Logger.Info( "forcing restart member", - zap.String("cluster-id", bwal.cid.String()), - zap.String("local-member-id", bwal.id.String()), + zap.String("cluster-id", meta.clusterID.String()), + zap.String("local-member-id", meta.nodeID.String()), zap.Uint64("commit-index", bwal.st.Commit), ) } else { cfg.Logger.Info( "restarting local member", - zap.String("cluster-id", bwal.cid.String()), - zap.String("local-member-id", bwal.id.String()), + zap.String("cluster-id", meta.clusterID.String()), + zap.String("local-member-id", meta.nodeID.String()), zap.Uint64("commit-index", bwal.st.Commit), ) } - b.raft = bootstrapRaftFromWal(cfg, bwal) + b.raft = bootstrapRaftFromSnapshot(cfg, bwal, meta) b.raft.cl.SetStore(st) b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) @@ -424,15 +434,15 @@ func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluste } } -func bootstrapRaftFromWal(cfg config.ServerConfig, bwal *bootstrappedWAL) *bootstrappedRaft { +func bootstrapRaftFromSnapshot(cfg config.ServerConfig, bwal *bootstrappedWAL, meta *snapshotMetadata) *bootstrappedRaft { cl := membership.NewCluster(cfg.Logger) - cl.SetID(bwal.id, bwal.cid) + cl.SetID(meta.nodeID, meta.clusterID) s := bwal.MemoryStorage() return &bootstrappedRaft{ lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, cl: cl, - config: raftConfig(cfg, uint64(bwal.id), s), + config: raftConfig(cfg, uint64(meta.nodeID), s), storage: s, } } @@ -486,7 +496,7 @@ func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL) *raft // bootstrapWALFromSnapshot reads the WAL at the given snap and returns the wal, its latest HardState and cluster ID, and all entries that appear // after the position of the given snap in the WAL. // The snap must have been previously saved to the WAL, or this call will panic. -func bootstrapWALFromSnapshot(lg *zap.Logger, waldir string, snapshot *raftpb.Snapshot, unsafeNoFsync bool) *bootstrappedWAL { +func bootstrapWALFromSnapshot(lg *zap.Logger, waldir string, snapshot *raftpb.Snapshot, unsafeNoFsync bool) (*bootstrappedWAL, *snapshotMetadata) { var walsnap walpb.Snapshot if snapshot != nil { walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term @@ -519,23 +529,26 @@ func bootstrapWALFromSnapshot(lg *zap.Logger, waldir string, snapshot *raftpb.Sn pbutil.MustUnmarshal(&metadata, wmetadata) id := types.ID(metadata.NodeID) cid := types.ID(metadata.ClusterID) + meta := &snapshotMetadata{clusterID: cid, nodeID: id} return &bootstrappedWAL{ lg: lg, w: w, - id: id, - cid: cid, st: &st, ents: ents, snapshot: snapshot, - } + }, meta } } -func bootstrapNewWAL(cfg config.ServerConfig, nodeID, clusterID types.ID) *bootstrappedWAL { +type snapshotMetadata struct { + nodeID, clusterID types.ID +} + +func bootstrapNewWAL(cfg config.ServerConfig, m *membership.Member, cl *membership.RaftCluster) *bootstrappedWAL { metadata := pbutil.MustMarshal( &etcdserverpb.Metadata{ - NodeID: uint64(nodeID), - ClusterID: uint64(clusterID), + NodeID: uint64(m.ID), + ClusterID: uint64(cl.ID()), }, ) w, err := wal.Create(cfg.Logger, cfg.WALDir(), metadata) @@ -546,10 +559,8 @@ func bootstrapNewWAL(cfg config.ServerConfig, nodeID, clusterID types.ID) *boots w.SetUnsafeNoFsync() } return &bootstrappedWAL{ - lg: cfg.Logger, - w: w, - id: nodeID, - cid: clusterID, + lg: cfg.Logger, + w: w, } } @@ -557,7 +568,6 @@ type bootstrappedWAL struct { lg *zap.Logger w *wal.WAL - id, cid types.ID st *raftpb.HardState ents []raftpb.Entry snapshot *raftpb.Snapshot @@ -592,11 +602,11 @@ func (wal *bootstrappedWAL) CommitedEntries() []raftpb.Entry { return wal.ents } -func (wal *bootstrappedWAL) ConfigChangeEntries() []raftpb.Entry { +func (wal *bootstrappedWAL) ConfigChangeEntries(meta *snapshotMetadata) []raftpb.Entry { return serverstorage.CreateConfigChangeEnts( wal.lg, serverstorage.GetIDs(wal.lg, wal.snapshot, wal.ents), - uint64(wal.id), + uint64(meta.nodeID), wal.st.Term, wal.st.Commit, ) diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 89d043ece..8923fd835 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -308,8 +308,8 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { } }() - sstats := stats.NewServerStats(cfg.Name, b.storage.cluster.wal.id.String()) - lstats := stats.NewLeaderStats(cfg.Logger, b.storage.cluster.wal.id.String()) + sstats := stats.NewServerStats(cfg.Name, b.storage.cluster.nodeID.String()) + lstats := stats.NewLeaderStats(cfg.Logger, b.storage.cluster.nodeID.String()) heartbeat := time.Duration(cfg.TickMs) * time.Millisecond srv = &EtcdServer{ @@ -321,20 +321,20 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { v2store: b.storage.st, snapshotter: b.ss, r: *b.storage.cluster.raft.newRaftNode(b.ss, b.storage.cluster.wal.w), - id: b.storage.cluster.wal.id, + id: b.storage.cluster.nodeID, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, cluster: b.storage.cluster.raft.cl, stats: sstats, lstats: lstats, SyncTicker: time.NewTicker(500 * time.Millisecond), peerRt: b.prt, - reqIDGen: idutil.NewGenerator(uint16(b.storage.cluster.wal.id), time.Now()), + reqIDGen: idutil.NewGenerator(uint16(b.storage.cluster.nodeID), time.Now()), AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist}, consistIndex: b.storage.ci, firstCommitInTerm: notify.NewNotifier(), clusterVersionChanged: notify.NewNotifier(), } - serverID.With(prometheus.Labels{"server_id": b.storage.cluster.wal.id.String()}).Set(1) + serverID.With(prometheus.Labels{"server_id": b.storage.cluster.nodeID.String()}).Set(1) srv.cluster.SetVersionChangedNotifier(srv.clusterVersionChanged) srv.applyV2 = NewApplierV2(cfg.Logger, srv.v2store, srv.cluster) @@ -403,7 +403,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { Logger: cfg.Logger, TLSInfo: cfg.PeerTLSInfo, DialTimeout: cfg.PeerDialTimeout(), - ID: b.storage.cluster.wal.id, + ID: b.storage.cluster.nodeID, URLs: cfg.PeerURLs, ClusterID: b.storage.cluster.raft.cl.ID(), Raft: srv, @@ -417,12 +417,12 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { } // add all remotes into transport for _, m := range b.storage.cluster.remotes { - if m.ID != b.storage.cluster.wal.id { + if m.ID != b.storage.cluster.nodeID { tr.AddRemote(m.ID, m.PeerURLs) } } for _, m := range b.storage.cluster.raft.cl.Members() { - if m.ID != b.storage.cluster.wal.id { + if m.ID != b.storage.cluster.nodeID { tr.AddPeer(m.ID, m.PeerURLs) } } From 648bac833feb1011cd571685b19bf1fd3da3b6d9 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 12:23:23 +0200 Subject: [PATCH 10/43] server: Move bootstrappedRaft up in file --- server/etcdserver/bootstrap.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index cd63b44a9..26f478583 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -105,6 +105,16 @@ type bootstrapedCluster struct { clusterID, nodeID types.ID } +type bootstrappedRaft struct { + lg *zap.Logger + heartbeat time.Duration + + peers []raft.Peer + config *raft.Config + cl *membership.RaftCluster + storage *raft.MemoryStorage +} + func bootstrapStorage(cfg config.ServerConfig, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) haveWAL := wal.Exist(cfg.WALDir()) @@ -461,16 +471,6 @@ func raftConfig(cfg config.ServerConfig, id uint64, s *raft.MemoryStorage) *raft } } -type bootstrappedRaft struct { - lg *zap.Logger - heartbeat time.Duration - - peers []raft.Peer - config *raft.Config - cl *membership.RaftCluster - storage *raft.MemoryStorage -} - func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL) *raftNode { var n raft.Node if len(b.peers) == 0 { From c97ab8f5e01b8ccb228b85bc23b357ed23a0984e Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 12:53:42 +0200 Subject: [PATCH 11/43] server: Move cluster up the bootstrap hierarchy --- server/etcdserver/bootstrap.go | 71 ++++++++++++++++++---------------- server/etcdserver/server.go | 26 ++++++------- 2 files changed, 51 insertions(+), 46 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 26f478583..4b1859487 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -77,25 +77,33 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { if err != nil { return nil, err } + + cluster, err := bootstrapCluster(cfg, storage, prt, ss) + if err != nil { + storage.be.Close() + return nil, err + } return &bootstrappedServer{ prt: prt, ss: ss, storage: storage, + cluster: cluster, }, nil } type bootstrappedServer struct { storage *bootstrappedStorage + cluster *bootstrapedCluster prt http.RoundTripper ss *snap.Snapshotter } type bootstrappedStorage struct { - cluster *bootstrapedCluster beHooks *serverstorage.BackendHooks st v2store.Store be backend.Backend ci cindex.ConsistentIndexer + beExist bool } type bootstrapedCluster struct { @@ -117,38 +125,17 @@ type bootstrappedRaft struct { func bootstrapStorage(cfg config.ServerConfig, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) - haveWAL := wal.Exist(cfg.WALDir()) be, ci, beExist, beHooks, err := bootstrapBackend(cfg) if err != nil { return nil, err } - defer func() { - if err != nil { - be.Close() - } - }() - var c *bootstrapedCluster - switch { - case !haveWAL && !cfg.NewCluster: - c, err = bootstrapExistingClusterNoWAL(cfg, prt, st, be) - case !haveWAL && cfg.NewCluster: - c, err = bootstrapNewClusterNoWAL(cfg, prt, st, be) - case haveWAL: - c, err = bootstrapClusterWithWAL(cfg, st, be, ss, beExist, beHooks, ci) - default: - be.Close() - return nil, fmt.Errorf("unsupported bootstrap config") - } - if err != nil { - return nil, err - } return &bootstrappedStorage{ - cluster: c, beHooks: beHooks, st: st, be: be, ci: ci, + beExist: beExist, }, nil } @@ -223,6 +210,24 @@ func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error { return be.Defrag() } +func bootstrapCluster(cfg config.ServerConfig, storage *bootstrappedStorage, prt http.RoundTripper, ss *snap.Snapshotter) (c *bootstrapedCluster, err error) { + haveWAL := wal.Exist(cfg.WALDir()) + switch { + case !haveWAL && !cfg.NewCluster: + c, err = bootstrapExistingClusterNoWAL(cfg, prt, storage.st, storage.be) + case !haveWAL && cfg.NewCluster: + c, err = bootstrapNewClusterNoWAL(cfg, prt, storage.st, storage.be) + case haveWAL: + c, err = bootstrapClusterWithWAL(cfg, storage, ss) + default: + return nil, fmt.Errorf("unsupported bootstrap config") + } + if err != nil { + return nil, err + } + return c, nil +} + func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrapedCluster, error) { if err := cfg.VerifyJoinExisting(); err != nil { return nil, err @@ -304,7 +309,7 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st }, nil } -func bootstrapClusterWithWAL(cfg config.ServerConfig, st v2store.Store, be backend.Backend, ss *snap.Snapshotter, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer) (*bootstrapedCluster, error) { +func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStorage, ss *snap.Snapshotter) (*bootstrapedCluster, error) { if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } @@ -333,11 +338,11 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, st v2store.Store, be backe } if snapshot != nil { - if err = st.Recovery(snapshot.Data); err != nil { + if err = storage.st.Recovery(snapshot.Data); err != nil { cfg.Logger.Panic("failed to recover from snapshot", zap.Error(err)) } - if err = serverstorage.AssertNoV2StoreContent(cfg.Logger, st, cfg.V2Deprecation); err != nil { + if err = serverstorage.AssertNoV2StoreContent(cfg.Logger, storage.st, cfg.V2Deprecation); err != nil { cfg.Logger.Error("illegal v2store content", zap.Error(err)) return nil, err } @@ -348,10 +353,10 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, st v2store.Store, be backe zap.String("snapshot-size", humanize.Bytes(uint64(snapshot.Size()))), ) - if be, err = serverstorage.RecoverSnapshotBackend(cfg, be, *snapshot, beExist, beHooks); err != nil { + if storage.be, err = serverstorage.RecoverSnapshotBackend(cfg, storage.be, *snapshot, storage.beExist, storage.beHooks); err != nil { cfg.Logger.Panic("failed to recover v3 backend from snapshot", zap.Error(err)) } - s1, s2 := be.Size(), be.SizeInUse() + s1, s2 := storage.be.Size(), storage.be.SizeInUse() cfg.Logger.Info( "recovered v3 backend from snapshot", zap.Int64("backend-size-bytes", s1), @@ -359,10 +364,10 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, st v2store.Store, be backe zap.Int64("backend-size-in-use-bytes", s2), zap.String("backend-size-in-use", humanize.Bytes(uint64(s2))), ) - if beExist { + if storage.beExist { // TODO: remove kvindex != 0 checking when we do not expect users to upgrade // etcd from pre-3.0 release. - kvindex := ci.ConsistentIndex() + kvindex := storage.ci.ConsistentIndex() if kvindex < snapshot.Metadata.Index { if kvindex != 0 { return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d)", cfg.BackendPath(), kvindex, snapshot.Metadata.Index) @@ -406,10 +411,10 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, st v2store.Store, be backe } b.raft = bootstrapRaftFromSnapshot(cfg, bwal, meta) - b.raft.cl.SetStore(st) - b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) + b.raft.cl.SetStore(storage.st) + b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.be)) b.raft.cl.Recover(api.UpdateCapability) - if b.raft.cl.Version() != nil && !b.raft.cl.Version().LessThan(semver.Version{Major: 3}) && !beExist { + if b.raft.cl.Version() != nil && !b.raft.cl.Version().LessThan(semver.Version{Major: 3}) && !storage.beExist { bepath := cfg.BackendPath() os.RemoveAll(bepath) return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 8923fd835..d99491c05 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -308,8 +308,8 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { } }() - sstats := stats.NewServerStats(cfg.Name, b.storage.cluster.nodeID.String()) - lstats := stats.NewLeaderStats(cfg.Logger, b.storage.cluster.nodeID.String()) + sstats := stats.NewServerStats(cfg.Name, b.cluster.nodeID.String()) + lstats := stats.NewLeaderStats(cfg.Logger, b.cluster.nodeID.String()) heartbeat := time.Duration(cfg.TickMs) * time.Millisecond srv = &EtcdServer{ @@ -320,21 +320,21 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { errorc: make(chan error, 1), v2store: b.storage.st, snapshotter: b.ss, - r: *b.storage.cluster.raft.newRaftNode(b.ss, b.storage.cluster.wal.w), - id: b.storage.cluster.nodeID, + r: *b.cluster.raft.newRaftNode(b.ss, b.cluster.wal.w), + id: b.cluster.nodeID, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, - cluster: b.storage.cluster.raft.cl, + cluster: b.cluster.raft.cl, stats: sstats, lstats: lstats, SyncTicker: time.NewTicker(500 * time.Millisecond), peerRt: b.prt, - reqIDGen: idutil.NewGenerator(uint16(b.storage.cluster.nodeID), time.Now()), + reqIDGen: idutil.NewGenerator(uint16(b.cluster.nodeID), time.Now()), AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist}, consistIndex: b.storage.ci, firstCommitInTerm: notify.NewNotifier(), clusterVersionChanged: notify.NewNotifier(), } - serverID.With(prometheus.Labels{"server_id": b.storage.cluster.nodeID.String()}).Set(1) + serverID.With(prometheus.Labels{"server_id": b.cluster.nodeID.String()}).Set(1) srv.cluster.SetVersionChangedNotifier(srv.clusterVersionChanged) srv.applyV2 = NewApplierV2(cfg.Logger, srv.v2store, srv.cluster) @@ -403,9 +403,9 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { Logger: cfg.Logger, TLSInfo: cfg.PeerTLSInfo, DialTimeout: cfg.PeerDialTimeout(), - ID: b.storage.cluster.nodeID, + ID: b.cluster.nodeID, URLs: cfg.PeerURLs, - ClusterID: b.storage.cluster.raft.cl.ID(), + ClusterID: b.cluster.raft.cl.ID(), Raft: srv, Snapshotter: b.ss, ServerStats: sstats, @@ -416,13 +416,13 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { return nil, err } // add all remotes into transport - for _, m := range b.storage.cluster.remotes { - if m.ID != b.storage.cluster.nodeID { + for _, m := range b.cluster.remotes { + if m.ID != b.cluster.nodeID { tr.AddRemote(m.ID, m.PeerURLs) } } - for _, m := range b.storage.cluster.raft.cl.Members() { - if m.ID != b.storage.cluster.nodeID { + for _, m := range b.cluster.raft.cl.Members() { + if m.ID != b.cluster.nodeID { tr.AddPeer(m.ID, m.PeerURLs) } } From 0211f5a2e80e585761829c93e5012750c9dc0689 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 6 Aug 2021 17:53:23 +0200 Subject: [PATCH 12/43] server: Move snapshot recovery to separate function --- server/etcdserver/bootstrap.go | 116 +++++++++++++++++---------------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 4b1859487..d37af2190 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -324,64 +324,10 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora zap.String("bwal-dir", cfg.WALDir()), ) } - - // Find a snapshot to start/restart a raft node - walSnaps, err := wal.ValidSnapshotEntries(cfg.Logger, cfg.WALDir()) + snapshot, err := recoverSnapshot(cfg, storage, ss) if err != nil { return nil, err } - // snapshot files can be orphaned if etcd crashes after writing them but before writing the corresponding - // bwal log entries - snapshot, err := ss.LoadNewestAvailable(walSnaps) - if err != nil && err != snap.ErrNoSnapshot { - return nil, err - } - - if snapshot != nil { - if err = storage.st.Recovery(snapshot.Data); err != nil { - cfg.Logger.Panic("failed to recover from snapshot", zap.Error(err)) - } - - if err = serverstorage.AssertNoV2StoreContent(cfg.Logger, storage.st, cfg.V2Deprecation); err != nil { - cfg.Logger.Error("illegal v2store content", zap.Error(err)) - return nil, err - } - - cfg.Logger.Info( - "recovered v2 store from snapshot", - zap.Uint64("snapshot-index", snapshot.Metadata.Index), - zap.String("snapshot-size", humanize.Bytes(uint64(snapshot.Size()))), - ) - - if storage.be, err = serverstorage.RecoverSnapshotBackend(cfg, storage.be, *snapshot, storage.beExist, storage.beHooks); err != nil { - cfg.Logger.Panic("failed to recover v3 backend from snapshot", zap.Error(err)) - } - s1, s2 := storage.be.Size(), storage.be.SizeInUse() - cfg.Logger.Info( - "recovered v3 backend from snapshot", - zap.Int64("backend-size-bytes", s1), - zap.String("backend-size", humanize.Bytes(uint64(s1))), - zap.Int64("backend-size-in-use-bytes", s2), - zap.String("backend-size-in-use", humanize.Bytes(uint64(s2))), - ) - if storage.beExist { - // TODO: remove kvindex != 0 checking when we do not expect users to upgrade - // etcd from pre-3.0 release. - kvindex := storage.ci.ConsistentIndex() - if kvindex < snapshot.Metadata.Index { - if kvindex != 0 { - return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d)", cfg.BackendPath(), kvindex, snapshot.Metadata.Index) - } - cfg.Logger.Warn( - "consistent index was never saved", - zap.Uint64("snapshot-index", snapshot.Metadata.Index), - ) - } - } - } else { - cfg.Logger.Info("No snapshot found. Recovering WAL from scratch!") - } - bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) b := &bootstrapedCluster{ @@ -422,6 +368,66 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora return b, nil } +func recoverSnapshot(cfg config.ServerConfig, storage *bootstrappedStorage, ss *snap.Snapshotter) (*raftpb.Snapshot, error) { + // Find a snapshot to start/restart a raft node + walSnaps, err := wal.ValidSnapshotEntries(cfg.Logger, cfg.WALDir()) + if err != nil { + return nil, err + } + // snapshot files can be orphaned if etcd crashes after writing them but before writing the corresponding + // bwal log entries + snapshot, err = ss.LoadNewestAvailable(walSnaps) + if err != nil && err != snap.ErrNoSnapshot { + return nil, err + } + + if snapshot != nil { + if err = storage.st.Recovery(snapshot.Data); err != nil { + cfg.Logger.Panic("failed to recover from snapshot", zap.Error(err)) + } + + if err = serverstorage.AssertNoV2StoreContent(cfg.Logger, storage.st, cfg.V2Deprecation); err != nil { + cfg.Logger.Error("illegal v2store content", zap.Error(err)) + return nil, err + } + + cfg.Logger.Info( + "recovered v2 store from snapshot", + zap.Uint64("snapshot-index", snapshot.Metadata.Index), + zap.String("snapshot-size", humanize.Bytes(uint64(snapshot.Size()))), + ) + + if storage.be, err = serverstorage.RecoverSnapshotBackend(cfg, storage.be, *snapshot, storage.beExist, storage.beHooks); err != nil { + cfg.Logger.Panic("failed to recover v3 backend from snapshot", zap.Error(err)) + } + s1, s2 := be.Size(), be.SizeInUse() + cfg.Logger.Info( + "recovered v3 backend from snapshot", + zap.Int64("backend-size-bytes", s1), + zap.String("backend-size", humanize.Bytes(uint64(s1))), + zap.Int64("backend-size-in-use-bytes", s2), + zap.String("backend-size-in-use", humanize.Bytes(uint64(s2))), + ) + if storage.beExist { + // TODO: remove kvindex != 0 checking when we do not expect users to upgrade + // etcd from pre-3.0 release. + kvindex := storage.ci.ConsistentIndex() + if kvindex < snapshot.Metadata.Index { + if kvindex != 0 { + return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d)", cfg.BackendPath(), kvindex, snapshot.Metadata.Index) + } + cfg.Logger.Warn( + "consistent index was never saved", + zap.Uint64("snapshot-index", snapshot.Metadata.Index), + ) + } + } + } else { + cfg.Logger.Info("No snapshot found. Recovering WAL from scratch!") + } + return snapshot, nil +} + func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID, bwal *bootstrappedWAL) *bootstrappedRaft { member := cl.MemberByName(cfg.Name) peers := make([]raft.Peer, len(ids)) From 7c8f7166e738eac7e5f0a1003cc910f66ffe198a Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 13:14:57 +0200 Subject: [PATCH 13/43] server: Move bootstraping backend from snapshot to bootstrapBackend --- server/etcdserver/bootstrap.go | 106 +++++++++++++++++++-------------- server/etcdserver/server.go | 8 +-- 2 files changed, 64 insertions(+), 50 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index d37af2190..6e6563aee 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -73,14 +73,15 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { return nil, fmt.Errorf("cannot access member directory: %v", terr) } - storage, err := bootstrapStorage(cfg, ss, prt) + haveWAL := wal.Exist(cfg.WALDir()) + storage, err := bootstrapStorage(cfg, haveWAL, ss, prt) if err != nil { return nil, err } - cluster, err := bootstrapCluster(cfg, storage, prt, ss) + cluster, err := bootstrapCluster(cfg, haveWAL, storage, prt, ss) if err != nil { - storage.be.Close() + storage.backend.be.Close() return nil, err } return &bootstrappedServer{ @@ -99,11 +100,16 @@ type bootstrappedServer struct { } type bootstrappedStorage struct { - beHooks *serverstorage.BackendHooks + backend *bootstrappedBackend st v2store.Store - be backend.Backend - ci cindex.ConsistentIndexer - beExist bool +} + +type bootstrappedBackend struct { + beHooks *serverstorage.BackendHooks + be backend.Backend + ci cindex.ConsistentIndexer + beExist bool + snapshot *raftpb.Snapshot } type bootstrapedCluster struct { @@ -123,19 +129,17 @@ type bootstrappedRaft struct { storage *raft.MemoryStorage } -func bootstrapStorage(cfg config.ServerConfig, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { +func bootstrapStorage(cfg config.ServerConfig, haveWAL bool, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) - be, ci, beExist, beHooks, err := bootstrapBackend(cfg) + backend, err := bootstrapBackend(cfg, haveWAL, st, ss) if err != nil { return nil, err } + return &bootstrappedStorage{ - beHooks: beHooks, + backend: backend, st: st, - be: be, - ci: ci, - beExist: beExist, }, nil } @@ -160,11 +164,11 @@ func bootstrapSnapshot(cfg config.ServerConfig) *snap.Snapshotter { return snap.New(cfg.Logger, cfg.SnapDir()) } -func bootstrapBackend(cfg config.ServerConfig) (be backend.Backend, ci cindex.ConsistentIndexer, beExist bool, beHooks *serverstorage.BackendHooks, err error) { - beExist = fileutil.Exist(cfg.BackendPath()) - ci = cindex.NewConsistentIndex(nil) - beHooks = serverstorage.NewBackendHooks(cfg.Logger, ci) - be = serverstorage.OpenBackend(cfg, beHooks) +func bootstrapBackend(cfg config.ServerConfig, haveWAL bool, st v2store.Store, ss *snap.Snapshotter) (backend *bootstrappedBackend, err error) { + beExist := fileutil.Exist(cfg.BackendPath()) + ci := cindex.NewConsistentIndex(nil) + beHooks := serverstorage.NewBackendHooks(cfg.Logger, ci) + be := serverstorage.OpenBackend(cfg, beHooks) defer func() { if err != nil && be != nil { be.Close() @@ -175,20 +179,35 @@ func bootstrapBackend(cfg config.ServerConfig) (be backend.Backend, ci cindex.Co if cfg.ExperimentalBootstrapDefragThresholdMegabytes != 0 { err = maybeDefragBackend(cfg, be) if err != nil { - return nil, nil, false, nil, err + return nil, err } } cfg.Logger.Debug("restore consistentIndex", zap.Uint64("index", ci.ConsistentIndex())) // TODO(serathius): Implement schema setup in fresh storage + var ( + snapshot *raftpb.Snapshot + ) + if haveWAL { + snapshot, be, err = recoverSnapshot(cfg, st, be, beExist, beHooks, ci, ss) + if err != nil { + return nil, err + } + } if beExist { err = schema.Validate(cfg.Logger, be.BatchTx()) if err != nil { cfg.Logger.Error("Failed to validate schema", zap.Error(err)) - return nil, nil, false, nil, err + return nil, err } } - return be, ci, beExist, beHooks, nil + return &bootstrappedBackend{ + beHooks: beHooks, + be: be, + ci: ci, + beExist: beExist, + snapshot: snapshot, + }, nil } func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error { @@ -210,15 +229,14 @@ func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error { return be.Defrag() } -func bootstrapCluster(cfg config.ServerConfig, storage *bootstrappedStorage, prt http.RoundTripper, ss *snap.Snapshotter) (c *bootstrapedCluster, err error) { - haveWAL := wal.Exist(cfg.WALDir()) +func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrappedStorage, prt http.RoundTripper, ss *snap.Snapshotter) (c *bootstrapedCluster, err error) { switch { case !haveWAL && !cfg.NewCluster: - c, err = bootstrapExistingClusterNoWAL(cfg, prt, storage.st, storage.be) + c, err = bootstrapExistingClusterNoWAL(cfg, prt, storage.st, storage.backend.be) case !haveWAL && cfg.NewCluster: - c, err = bootstrapNewClusterNoWAL(cfg, prt, storage.st, storage.be) + c, err = bootstrapNewClusterNoWAL(cfg, prt, storage.st, storage.backend.be) case haveWAL: - c, err = bootstrapClusterWithWAL(cfg, storage, ss) + c, err = bootstrapClusterWithWAL(cfg, storage) default: return nil, fmt.Errorf("unsupported bootstrap config") } @@ -309,7 +327,7 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st }, nil } -func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStorage, ss *snap.Snapshotter) (*bootstrapedCluster, error) { +func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStorage) (*bootstrapedCluster, error) { if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } @@ -324,11 +342,7 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora zap.String("bwal-dir", cfg.WALDir()), ) } - snapshot, err := recoverSnapshot(cfg, storage, ss) - if err != nil { - return nil, err - } - bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), snapshot, cfg.UnsafeNoFsync) + bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), storage.backend.snapshot, cfg.UnsafeNoFsync) b := &bootstrapedCluster{ wal: bwal, @@ -358,9 +372,9 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora b.raft = bootstrapRaftFromSnapshot(cfg, bwal, meta) b.raft.cl.SetStore(storage.st) - b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.be)) + b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.backend.be)) b.raft.cl.Recover(api.UpdateCapability) - if b.raft.cl.Version() != nil && !b.raft.cl.Version().LessThan(semver.Version{Major: 3}) && !storage.beExist { + if b.raft.cl.Version() != nil && !b.raft.cl.Version().LessThan(semver.Version{Major: 3}) && !storage.backend.beExist { bepath := cfg.BackendPath() os.RemoveAll(bepath) return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) @@ -368,27 +382,27 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora return b, nil } -func recoverSnapshot(cfg config.ServerConfig, storage *bootstrappedStorage, ss *snap.Snapshotter) (*raftpb.Snapshot, error) { +func recoverSnapshot(cfg config.ServerConfig, st v2store.Store, be backend.Backend, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer, ss *snap.Snapshotter) (*raftpb.Snapshot, backend.Backend, error) { // Find a snapshot to start/restart a raft node walSnaps, err := wal.ValidSnapshotEntries(cfg.Logger, cfg.WALDir()) if err != nil { - return nil, err + return nil, be, err } // snapshot files can be orphaned if etcd crashes after writing them but before writing the corresponding // bwal log entries - snapshot, err = ss.LoadNewestAvailable(walSnaps) + snapshot, err := ss.LoadNewestAvailable(walSnaps) if err != nil && err != snap.ErrNoSnapshot { - return nil, err + return nil, be, err } if snapshot != nil { - if err = storage.st.Recovery(snapshot.Data); err != nil { + if err = st.Recovery(snapshot.Data); err != nil { cfg.Logger.Panic("failed to recover from snapshot", zap.Error(err)) } - if err = serverstorage.AssertNoV2StoreContent(cfg.Logger, storage.st, cfg.V2Deprecation); err != nil { + if err = serverstorage.AssertNoV2StoreContent(cfg.Logger, st, cfg.V2Deprecation); err != nil { cfg.Logger.Error("illegal v2store content", zap.Error(err)) - return nil, err + return nil, be, err } cfg.Logger.Info( @@ -397,7 +411,7 @@ func recoverSnapshot(cfg config.ServerConfig, storage *bootstrappedStorage, ss * zap.String("snapshot-size", humanize.Bytes(uint64(snapshot.Size()))), ) - if storage.be, err = serverstorage.RecoverSnapshotBackend(cfg, storage.be, *snapshot, storage.beExist, storage.beHooks); err != nil { + if be, err = serverstorage.RecoverSnapshotBackend(cfg, be, *snapshot, beExist, beHooks); err != nil { cfg.Logger.Panic("failed to recover v3 backend from snapshot", zap.Error(err)) } s1, s2 := be.Size(), be.SizeInUse() @@ -408,13 +422,13 @@ func recoverSnapshot(cfg config.ServerConfig, storage *bootstrappedStorage, ss * zap.Int64("backend-size-in-use-bytes", s2), zap.String("backend-size-in-use", humanize.Bytes(uint64(s2))), ) - if storage.beExist { + if beExist { // TODO: remove kvindex != 0 checking when we do not expect users to upgrade // etcd from pre-3.0 release. - kvindex := storage.ci.ConsistentIndex() + kvindex := ci.ConsistentIndex() if kvindex < snapshot.Metadata.Index { if kvindex != 0 { - return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d)", cfg.BackendPath(), kvindex, snapshot.Metadata.Index) + return nil, be, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d)", cfg.BackendPath(), kvindex, snapshot.Metadata.Index) } cfg.Logger.Warn( "consistent index was never saved", @@ -425,7 +439,7 @@ func recoverSnapshot(cfg config.ServerConfig, storage *bootstrappedStorage, ss * } else { cfg.Logger.Info("No snapshot found. Recovering WAL from scratch!") } - return snapshot, nil + return snapshot, be, nil } func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID, bwal *bootstrappedWAL) *bootstrappedRaft { diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index d99491c05..c13af0cc7 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -304,7 +304,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { defer func() { if err != nil { - b.storage.be.Close() + b.storage.backend.be.Close() } }() @@ -330,7 +330,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { peerRt: b.prt, reqIDGen: idutil.NewGenerator(uint16(b.cluster.nodeID), time.Now()), AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist}, - consistIndex: b.storage.ci, + consistIndex: b.storage.backend.ci, firstCommitInTerm: notify.NewNotifier(), clusterVersionChanged: notify.NewNotifier(), } @@ -338,8 +338,8 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { srv.cluster.SetVersionChangedNotifier(srv.clusterVersionChanged) srv.applyV2 = NewApplierV2(cfg.Logger, srv.v2store, srv.cluster) - srv.be = b.storage.be - srv.beHooks = b.storage.beHooks + srv.be = b.storage.backend.be + srv.beHooks = b.storage.backend.beHooks minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. From 8b0d8ea2afd737a40737b69699d38d9d98acb045 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 13:39:29 +0200 Subject: [PATCH 14/43] server: Move cluster up the bootstrap hierarchy --- server/etcdserver/bootstrap.go | 63 ++++++++++++++++------------------ server/etcdserver/server.go | 10 +++--- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 6e6563aee..fe7c64ea4 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -113,10 +113,11 @@ type bootstrappedBackend struct { } type bootstrapedCluster struct { - raft *bootstrappedRaft - remotes []*membership.Member - wal *bootstrappedWAL - clusterID, nodeID types.ID + raft *bootstrappedRaft + remotes []*membership.Member + wal *bootstrappedWAL + cl *membership.RaftCluster + nodeID types.ID } type bootstrappedRaft struct { @@ -125,7 +126,6 @@ type bootstrappedRaft struct { peers []raft.Peer config *raft.Config - cl *membership.RaftCluster storage *raft.MemoryStorage } @@ -274,11 +274,11 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe br := bootstrapRaftFromCluster(cfg, cl, nil, bwal) cl.SetID(member.ID, existingCluster.ID()) return &bootstrapedCluster{ - raft: br, - remotes: remotes, - wal: bwal, - clusterID: cl.ID(), - nodeID: member.ID, + raft: br, + remotes: remotes, + wal: bwal, + cl: cl, + nodeID: member.ID, }, nil } @@ -319,11 +319,11 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st br := bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs(), bwal) cl.SetID(member.ID, cl.ID()) return &bootstrapedCluster{ - remotes: nil, - raft: br, - wal: bwal, - clusterID: cl.ID(), - nodeID: member.ID, + remotes: nil, + raft: br, + wal: bwal, + cl: cl, + nodeID: member.ID, }, nil } @@ -343,12 +343,6 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora ) } bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), storage.backend.snapshot, cfg.UnsafeNoFsync) - - b := &bootstrapedCluster{ - wal: bwal, - clusterID: meta.clusterID, - nodeID: meta.nodeID, - } if cfg.ForceNewCluster { // discard the previously uncommitted entries bwal.ents = bwal.CommitedEntries() @@ -369,17 +363,24 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora zap.Uint64("commit-index", bwal.st.Commit), ) } - b.raft = bootstrapRaftFromSnapshot(cfg, bwal, meta) + cl := membership.NewCluster(cfg.Logger) + cl.SetID(meta.nodeID, meta.clusterID) + raft := bootstrapRaftFromSnapshot(cfg, bwal, meta) - b.raft.cl.SetStore(storage.st) - b.raft.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.backend.be)) - b.raft.cl.Recover(api.UpdateCapability) - if b.raft.cl.Version() != nil && !b.raft.cl.Version().LessThan(semver.Version{Major: 3}) && !storage.backend.beExist { + cl.SetStore(storage.st) + cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.backend.be)) + cl.Recover(api.UpdateCapability) + if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !storage.backend.beExist { bepath := cfg.BackendPath() os.RemoveAll(bepath) return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) } - return b, nil + return &bootstrapedCluster{ + raft: raft, + wal: bwal, + cl: cl, + nodeID: meta.nodeID, + }, nil } func recoverSnapshot(cfg config.ServerConfig, st v2store.Store, be backend.Backend, beExist bool, beHooks *serverstorage.BackendHooks, ci cindex.ConsistentIndexer, ss *snap.Snapshotter) (*raftpb.Snapshot, backend.Backend, error) { @@ -462,7 +463,6 @@ func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluste return &bootstrappedRaft{ lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, - cl: cl, config: raftConfig(cfg, uint64(member.ID), s), peers: peers, storage: s, @@ -470,13 +470,10 @@ func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluste } func bootstrapRaftFromSnapshot(cfg config.ServerConfig, bwal *bootstrappedWAL, meta *snapshotMetadata) *bootstrappedRaft { - cl := membership.NewCluster(cfg.Logger) - cl.SetID(meta.nodeID, meta.clusterID) s := bwal.MemoryStorage() return &bootstrappedRaft{ lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, - cl: cl, config: raftConfig(cfg, uint64(meta.nodeID), s), storage: s, } @@ -496,7 +493,7 @@ func raftConfig(cfg config.ServerConfig, id uint64, s *raft.MemoryStorage) *raft } } -func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL) *raftNode { +func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL, cl *membership.RaftCluster) *raftNode { var n raft.Node if len(b.peers) == 0 { n = raft.RestartNode(b.config) @@ -509,7 +506,7 @@ func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL) *raft return newRaftNode( raftNodeConfig{ lg: b.lg, - isIDRemoved: func(id uint64) bool { return b.cl.IsIDRemoved(types.ID(id)) }, + isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) }, Node: n, heartbeat: b.heartbeat, raftStorage: b.storage, diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index c13af0cc7..421f7d059 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -308,7 +308,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { } }() - sstats := stats.NewServerStats(cfg.Name, b.cluster.nodeID.String()) + sstats := stats.NewServerStats(cfg.Name, b.cluster.cl.String()) lstats := stats.NewLeaderStats(cfg.Logger, b.cluster.nodeID.String()) heartbeat := time.Duration(cfg.TickMs) * time.Millisecond @@ -320,10 +320,10 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { errorc: make(chan error, 1), v2store: b.storage.st, snapshotter: b.ss, - r: *b.cluster.raft.newRaftNode(b.ss, b.cluster.wal.w), + r: *b.cluster.raft.newRaftNode(b.ss, b.cluster.wal.w, b.cluster.cl), id: b.cluster.nodeID, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, - cluster: b.cluster.raft.cl, + cluster: b.cluster.cl, stats: sstats, lstats: lstats, SyncTicker: time.NewTicker(500 * time.Millisecond), @@ -405,7 +405,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { DialTimeout: cfg.PeerDialTimeout(), ID: b.cluster.nodeID, URLs: cfg.PeerURLs, - ClusterID: b.cluster.raft.cl.ID(), + ClusterID: b.cluster.cl.ID(), Raft: srv, Snapshotter: b.ss, ServerStats: sstats, @@ -421,7 +421,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { tr.AddRemote(m.ID, m.PeerURLs) } } - for _, m := range b.cluster.raft.cl.Members() { + for _, m := range b.cluster.cl.Members() { if m.ID != b.cluster.nodeID { tr.AddPeer(m.ID, m.PeerURLs) } From 5d044563a8cca851d773d58e8905af6ab4cff340 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 13:55:22 +0200 Subject: [PATCH 15/43] server: Move raft and wal up the bootstrap hierarchy --- server/etcdserver/bootstrap.go | 97 +++++++++++++++++----------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index fe7c64ea4..d923a595b 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -233,17 +233,57 @@ func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrapp switch { case !haveWAL && !cfg.NewCluster: c, err = bootstrapExistingClusterNoWAL(cfg, prt, storage.st, storage.backend.be) + if err != nil { + return nil, err + } + c.wal = bootstrapNewWAL(cfg, c) + c.raft = bootstrapRaftFromCluster(cfg, c.cl, nil, c.wal) + c.cl.SetID(c.nodeID, c.cl.ID()) + return c, nil case !haveWAL && cfg.NewCluster: c, err = bootstrapNewClusterNoWAL(cfg, prt, storage.st, storage.backend.be) + if err != nil { + return nil, err + } + c.wal = bootstrapNewWAL(cfg, c) + c.raft = bootstrapRaftFromCluster(cfg, c.cl, c.cl.MemberIDs(), c.wal) + c.cl.SetID(c.nodeID, c.cl.ID()) + return c, nil case haveWAL: - c, err = bootstrapClusterWithWAL(cfg, storage) + bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), storage.backend.snapshot, cfg.UnsafeNoFsync) + if cfg.ForceNewCluster { + // discard the previously uncommitted entries + bwal.ents = bwal.CommitedEntries() + entries := bwal.ConfigChangeEntries(meta) + // force commit config change entries + bwal.AppendAndCommitEntries(entries) + cfg.Logger.Info( + "forcing restart member", + zap.String("cluster-id", meta.clusterID.String()), + zap.String("local-member-id", meta.nodeID.String()), + zap.Uint64("commit-index", bwal.st.Commit), + ) + } else { + cfg.Logger.Info( + "restarting local member", + zap.String("cluster-id", meta.clusterID.String()), + zap.String("local-member-id", meta.nodeID.String()), + zap.Uint64("commit-index", bwal.st.Commit), + ) + } + c, err = bootstrapClusterWithWAL(cfg, storage, meta) + if err != nil { + return nil, err + } + if err := fileutil.IsDirWriteable(cfg.WALDir()); err != nil { + return nil, fmt.Errorf("cannot write to WAL directory: %v", err) + } + c.raft = bootstrapRaftFromSnapshot(cfg, bwal, meta) + c.wal = bwal + return c, nil default: return nil, fmt.Errorf("unsupported bootstrap config") } - if err != nil { - return nil, err - } - return c, nil } func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrapedCluster, error) { @@ -270,13 +310,8 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe cl.SetStore(st) cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) member := cl.MemberByName(cfg.Name) - bwal := bootstrapNewWAL(cfg, member, cl) - br := bootstrapRaftFromCluster(cfg, cl, nil, bwal) - cl.SetID(member.ID, existingCluster.ID()) return &bootstrapedCluster{ - raft: br, remotes: remotes, - wal: bwal, cl: cl, nodeID: member.ID, }, nil @@ -315,58 +350,26 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st cl.SetStore(st) cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) member := cl.MemberByName(cfg.Name) - bwal := bootstrapNewWAL(cfg, member, cl) - br := bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs(), bwal) - cl.SetID(member.ID, cl.ID()) return &bootstrapedCluster{ remotes: nil, - raft: br, - wal: bwal, cl: cl, nodeID: member.ID, }, nil } -func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStorage) (*bootstrapedCluster, error) { +func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStorage, meta *snapshotMetadata) (*bootstrapedCluster, error) { if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } - if err := fileutil.IsDirWriteable(cfg.WALDir()); err != nil { - return nil, fmt.Errorf("cannot write to WAL directory: %v", err) - } - if cfg.ShouldDiscover() { cfg.Logger.Warn( "discovery token is ignored since cluster already initialized; valid logs are found", zap.String("bwal-dir", cfg.WALDir()), ) } - bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), storage.backend.snapshot, cfg.UnsafeNoFsync) - if cfg.ForceNewCluster { - // discard the previously uncommitted entries - bwal.ents = bwal.CommitedEntries() - entries := bwal.ConfigChangeEntries(meta) - // force commit config change entries - bwal.AppendAndCommitEntries(entries) - cfg.Logger.Info( - "forcing restart member", - zap.String("cluster-id", meta.clusterID.String()), - zap.String("local-member-id", meta.nodeID.String()), - zap.Uint64("commit-index", bwal.st.Commit), - ) - } else { - cfg.Logger.Info( - "restarting local member", - zap.String("cluster-id", meta.clusterID.String()), - zap.String("local-member-id", meta.nodeID.String()), - zap.Uint64("commit-index", bwal.st.Commit), - ) - } cl := membership.NewCluster(cfg.Logger) cl.SetID(meta.nodeID, meta.clusterID) - raft := bootstrapRaftFromSnapshot(cfg, bwal, meta) - cl.SetStore(storage.st) cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.backend.be)) cl.Recover(api.UpdateCapability) @@ -376,8 +379,6 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) } return &bootstrapedCluster{ - raft: raft, - wal: bwal, cl: cl, nodeID: meta.nodeID, }, nil @@ -566,11 +567,11 @@ type snapshotMetadata struct { nodeID, clusterID types.ID } -func bootstrapNewWAL(cfg config.ServerConfig, m *membership.Member, cl *membership.RaftCluster) *bootstrappedWAL { +func bootstrapNewWAL(cfg config.ServerConfig, cluster *bootstrapedCluster) *bootstrappedWAL { metadata := pbutil.MustMarshal( &etcdserverpb.Metadata{ - NodeID: uint64(m.ID), - ClusterID: uint64(cl.ID()), + NodeID: uint64(cluster.nodeID), + ClusterID: uint64(cluster.cl.ID()), }, ) w, err := wal.Create(cfg.Logger, cfg.WALDir(), metadata) From 138afa5be9609a2f3c7cdfbc8c3dc2cd5d24781f Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 14:37:23 +0200 Subject: [PATCH 16/43] server: Split wal, cluster and raft bootstraping --- server/etcdserver/bootstrap.go | 134 +++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 57 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index d923a595b..9d905e61f 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -74,20 +74,20 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { } haveWAL := wal.Exist(cfg.WALDir()) - storage, err := bootstrapStorage(cfg, haveWAL, ss, prt) + s, err := bootstrapStorage(cfg, haveWAL, ss, prt) if err != nil { return nil, err } - cluster, err := bootstrapCluster(cfg, haveWAL, storage, prt, ss) + cluster, err := bootstrapCluster(cfg, haveWAL, s, prt, ss) if err != nil { - storage.backend.be.Close() + s.backend.be.Close() return nil, err } return &bootstrappedServer{ prt: prt, ss: ss, - storage: storage, + storage: s, cluster: cluster, }, nil } @@ -230,60 +230,53 @@ func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error { } func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrappedStorage, prt http.RoundTripper, ss *snap.Snapshotter) (c *bootstrapedCluster, err error) { + c = &bootstrapedCluster{} + var ( + meta *snapshotMetadata + bwal *bootstrappedWAL + ) + if haveWAL { + if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil { + return nil, fmt.Errorf("cannot write to WAL directory: %v", err) + } + bwal, meta = bootstrapWALFromSnapshot(cfg, storage.backend.snapshot) + } + switch { case !haveWAL && !cfg.NewCluster: c, err = bootstrapExistingClusterNoWAL(cfg, prt, storage.st, storage.backend.be) if err != nil { return nil, err } - c.wal = bootstrapNewWAL(cfg, c) - c.raft = bootstrapRaftFromCluster(cfg, c.cl, nil, c.wal) - c.cl.SetID(c.nodeID, c.cl.ID()) - return c, nil + c.wal = bootstrapNewWAL(cfg, c.cl, c.nodeID) case !haveWAL && cfg.NewCluster: c, err = bootstrapNewClusterNoWAL(cfg, prt, storage.st, storage.backend.be) if err != nil { return nil, err } - c.wal = bootstrapNewWAL(cfg, c) - c.raft = bootstrapRaftFromCluster(cfg, c.cl, c.cl.MemberIDs(), c.wal) - c.cl.SetID(c.nodeID, c.cl.ID()) - return c, nil + c.wal = bootstrapNewWAL(cfg, c.cl, c.nodeID) case haveWAL: - bwal, meta := bootstrapWALFromSnapshot(cfg.Logger, cfg.WALDir(), storage.backend.snapshot, cfg.UnsafeNoFsync) - if cfg.ForceNewCluster { - // discard the previously uncommitted entries - bwal.ents = bwal.CommitedEntries() - entries := bwal.ConfigChangeEntries(meta) - // force commit config change entries - bwal.AppendAndCommitEntries(entries) - cfg.Logger.Info( - "forcing restart member", - zap.String("cluster-id", meta.clusterID.String()), - zap.String("local-member-id", meta.nodeID.String()), - zap.Uint64("commit-index", bwal.st.Commit), - ) - } else { - cfg.Logger.Info( - "restarting local member", - zap.String("cluster-id", meta.clusterID.String()), - zap.String("local-member-id", meta.nodeID.String()), - zap.Uint64("commit-index", bwal.st.Commit), - ) - } c, err = bootstrapClusterWithWAL(cfg, storage, meta) if err != nil { return nil, err } - if err := fileutil.IsDirWriteable(cfg.WALDir()); err != nil { - return nil, fmt.Errorf("cannot write to WAL directory: %v", err) - } - c.raft = bootstrapRaftFromSnapshot(cfg, bwal, meta) c.wal = bwal - return c, nil default: return nil, fmt.Errorf("unsupported bootstrap config") } + switch { + case !haveWAL && !cfg.NewCluster: + c.raft = bootstrapRaftFromCluster(cfg, c.cl, nil, c.wal) + c.cl.SetID(c.nodeID, c.cl.ID()) + case !haveWAL && cfg.NewCluster: + c.raft = bootstrapRaftFromCluster(cfg, c.cl, c.cl.MemberIDs(), c.wal) + c.cl.SetID(c.nodeID, c.cl.ID()) + case haveWAL: + c.raft = bootstrapRaftFromSnapshot(cfg, c.wal, meta) + default: + return nil, fmt.Errorf("unsupported bootstrap config") + } + return c, nil } func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrapedCluster, error) { @@ -516,21 +509,54 @@ func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL, cl *m ) } -// bootstrapWALFromSnapshot reads the WAL at the given snap and returns the wal, its latest HardState and cluster ID, and all entries that appear +func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot) (*bootstrappedWAL, *snapshotMetadata) { + wal, st, ents, snap, meta := openWALFromSnapshot(cfg, snapshot) + bwal := &bootstrappedWAL{ + lg: cfg.Logger, + w: wal, + st: st, + ents: ents, + snapshot: snap, + } + + if cfg.ForceNewCluster { + // discard the previously uncommitted entries + bwal.ents = bwal.CommitedEntries() + entries := bwal.ConfigChangeEntries(meta) + // force commit config change entries + bwal.AppendAndCommitEntries(entries) + cfg.Logger.Info( + "forcing restart member", + zap.String("cluster-id", meta.clusterID.String()), + zap.String("local-member-id", meta.nodeID.String()), + zap.Uint64("commit-index", bwal.st.Commit), + ) + } else { + cfg.Logger.Info( + "restarting local member", + zap.String("cluster-id", meta.clusterID.String()), + zap.String("local-member-id", meta.nodeID.String()), + zap.Uint64("commit-index", bwal.st.Commit), + ) + } + return bwal, meta +} + +// openWALFromSnapshot reads the WAL at the given snap and returns the wal, its latest HardState and cluster ID, and all entries that appear // after the position of the given snap in the WAL. // The snap must have been previously saved to the WAL, or this call will panic. -func bootstrapWALFromSnapshot(lg *zap.Logger, waldir string, snapshot *raftpb.Snapshot, unsafeNoFsync bool) (*bootstrappedWAL, *snapshotMetadata) { +func openWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot) (*wal.WAL, *raftpb.HardState, []raftpb.Entry, *raftpb.Snapshot, *snapshotMetadata) { var walsnap walpb.Snapshot if snapshot != nil { walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term } repaired := false for { - w, err := wal.Open(lg, waldir, walsnap) + w, err := wal.Open(cfg.Logger, cfg.WALDir(), walsnap) if err != nil { - lg.Fatal("failed to open WAL", zap.Error(err)) + cfg.Logger.Fatal("failed to open WAL", zap.Error(err)) } - if unsafeNoFsync { + if cfg.UnsafeNoFsync { w.SetUnsafeNoFsync() } wmetadata, st, ents, err := w.ReadAll() @@ -538,12 +564,12 @@ func bootstrapWALFromSnapshot(lg *zap.Logger, waldir string, snapshot *raftpb.Sn w.Close() // we can only repair ErrUnexpectedEOF and we never repair twice. if repaired || err != io.ErrUnexpectedEOF { - lg.Fatal("failed to read WAL, cannot be repaired", zap.Error(err)) + cfg.Logger.Fatal("failed to read WAL, cannot be repaired", zap.Error(err)) } - if !wal.Repair(lg, waldir) { - lg.Fatal("failed to repair WAL", zap.Error(err)) + if !wal.Repair(cfg.Logger, cfg.WALDir()) { + cfg.Logger.Fatal("failed to repair WAL", zap.Error(err)) } else { - lg.Info("repaired WAL", zap.Error(err)) + cfg.Logger.Info("repaired WAL", zap.Error(err)) repaired = true } continue @@ -553,13 +579,7 @@ func bootstrapWALFromSnapshot(lg *zap.Logger, waldir string, snapshot *raftpb.Sn id := types.ID(metadata.NodeID) cid := types.ID(metadata.ClusterID) meta := &snapshotMetadata{clusterID: cid, nodeID: id} - return &bootstrappedWAL{ - lg: lg, - w: w, - st: &st, - ents: ents, - snapshot: snapshot, - }, meta + return w, &st, ents, snapshot, meta } } @@ -567,11 +587,11 @@ type snapshotMetadata struct { nodeID, clusterID types.ID } -func bootstrapNewWAL(cfg config.ServerConfig, cluster *bootstrapedCluster) *bootstrappedWAL { +func bootstrapNewWAL(cfg config.ServerConfig, cl *membership.RaftCluster, nodeID types.ID) *bootstrappedWAL { metadata := pbutil.MustMarshal( &etcdserverpb.Metadata{ - NodeID: uint64(cluster.nodeID), - ClusterID: uint64(cluster.cl.ID()), + NodeID: uint64(nodeID), + ClusterID: uint64(cl.ID()), }, ) w, err := wal.Create(cfg.Logger, cfg.WALDir(), metadata) From 049e2d6ec0951f6b4e6be0f7d6c2cc8ebda65c97 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 15:04:28 +0200 Subject: [PATCH 17/43] server: Move raft up the bootstrap hierarchy --- server/etcdserver/bootstrap.go | 55 +++++++++++++++++++--------------- server/etcdserver/server.go | 2 +- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 9d905e61f..9bb3e8155 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -72,7 +72,6 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { return nil, fmt.Errorf("cannot access member directory: %v", terr) } - haveWAL := wal.Exist(cfg.WALDir()) s, err := bootstrapStorage(cfg, haveWAL, ss, prt) if err != nil { @@ -84,17 +83,23 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { s.backend.be.Close() return nil, err } + raft := bootstrapRaft(cfg, haveWAL, cluster.cl, cluster.wal) + if !haveWAL { + cluster.cl.SetID(cluster.nodeID, cluster.cl.ID()) + } return &bootstrappedServer{ prt: prt, ss: ss, storage: s, cluster: cluster, + raft: raft, }, nil } type bootstrappedServer struct { storage *bootstrappedStorage cluster *bootstrapedCluster + raft *bootstrappedRaft prt http.RoundTripper ss *snap.Snapshotter } @@ -113,7 +118,6 @@ type bootstrappedBackend struct { } type bootstrapedCluster struct { - raft *bootstrappedRaft remotes []*membership.Member wal *bootstrappedWAL cl *membership.RaftCluster @@ -232,14 +236,13 @@ func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error { func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrappedStorage, prt http.RoundTripper, ss *snap.Snapshotter) (c *bootstrapedCluster, err error) { c = &bootstrapedCluster{} var ( - meta *snapshotMetadata bwal *bootstrappedWAL ) if haveWAL { if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil { return nil, fmt.Errorf("cannot write to WAL directory: %v", err) } - bwal, meta = bootstrapWALFromSnapshot(cfg, storage.backend.snapshot) + bwal = bootstrapWALFromSnapshot(cfg, storage.backend.snapshot) } switch { @@ -256,7 +259,7 @@ func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrapp } c.wal = bootstrapNewWAL(cfg, c.cl, c.nodeID) case haveWAL: - c, err = bootstrapClusterWithWAL(cfg, storage, meta) + c, err = bootstrapClusterWithWAL(cfg, storage, bwal.meta) if err != nil { return nil, err } @@ -264,18 +267,6 @@ func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrapp default: return nil, fmt.Errorf("unsupported bootstrap config") } - switch { - case !haveWAL && !cfg.NewCluster: - c.raft = bootstrapRaftFromCluster(cfg, c.cl, nil, c.wal) - c.cl.SetID(c.nodeID, c.cl.ID()) - case !haveWAL && cfg.NewCluster: - c.raft = bootstrapRaftFromCluster(cfg, c.cl, c.cl.MemberIDs(), c.wal) - c.cl.SetID(c.nodeID, c.cl.ID()) - case haveWAL: - c.raft = bootstrapRaftFromSnapshot(cfg, c.wal, meta) - default: - return nil, fmt.Errorf("unsupported bootstrap config") - } return c, nil } @@ -437,6 +428,20 @@ func recoverSnapshot(cfg config.ServerConfig, st v2store.Store, be backend.Backe return snapshot, be, nil } +func bootstrapRaft(cfg config.ServerConfig, haveWAL bool, cl *membership.RaftCluster, bwal *bootstrappedWAL) *bootstrappedRaft { + switch { + case !haveWAL && !cfg.NewCluster: + return bootstrapRaftFromCluster(cfg, cl, nil, bwal) + case !haveWAL && cfg.NewCluster: + return bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs(), bwal) + case haveWAL: + return bootstrapRaftFromWAL(cfg, bwal) + default: + cfg.Logger.Panic("unsupported bootstrap config") + return nil + } +} + func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluster, ids []types.ID, bwal *bootstrappedWAL) *bootstrappedRaft { member := cl.MemberByName(cfg.Name) peers := make([]raft.Peer, len(ids)) @@ -463,12 +468,12 @@ func bootstrapRaftFromCluster(cfg config.ServerConfig, cl *membership.RaftCluste } } -func bootstrapRaftFromSnapshot(cfg config.ServerConfig, bwal *bootstrappedWAL, meta *snapshotMetadata) *bootstrappedRaft { +func bootstrapRaftFromWAL(cfg config.ServerConfig, bwal *bootstrappedWAL) *bootstrappedRaft { s := bwal.MemoryStorage() return &bootstrappedRaft{ lg: cfg.Logger, heartbeat: time.Duration(cfg.TickMs) * time.Millisecond, - config: raftConfig(cfg, uint64(meta.nodeID), s), + config: raftConfig(cfg, uint64(bwal.meta.nodeID), s), storage: s, } } @@ -509,7 +514,7 @@ func (b *bootstrappedRaft) newRaftNode(ss *snap.Snapshotter, wal *wal.WAL, cl *m ) } -func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot) (*bootstrappedWAL, *snapshotMetadata) { +func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot) *bootstrappedWAL { wal, st, ents, snap, meta := openWALFromSnapshot(cfg, snapshot) bwal := &bootstrappedWAL{ lg: cfg.Logger, @@ -517,12 +522,13 @@ func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot st: st, ents: ents, snapshot: snap, + meta: meta, } if cfg.ForceNewCluster { // discard the previously uncommitted entries bwal.ents = bwal.CommitedEntries() - entries := bwal.ConfigChangeEntries(meta) + entries := bwal.ConfigChangeEntries() // force commit config change entries bwal.AppendAndCommitEntries(entries) cfg.Logger.Info( @@ -539,7 +545,7 @@ func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot zap.Uint64("commit-index", bwal.st.Commit), ) } - return bwal, meta + return bwal } // openWALFromSnapshot reads the WAL at the given snap and returns the wal, its latest HardState and cluster ID, and all entries that appear @@ -614,6 +620,7 @@ type bootstrappedWAL struct { st *raftpb.HardState ents []raftpb.Entry snapshot *raftpb.Snapshot + meta *snapshotMetadata } func (wal *bootstrappedWAL) MemoryStorage() *raft.MemoryStorage { @@ -645,11 +652,11 @@ func (wal *bootstrappedWAL) CommitedEntries() []raftpb.Entry { return wal.ents } -func (wal *bootstrappedWAL) ConfigChangeEntries(meta *snapshotMetadata) []raftpb.Entry { +func (wal *bootstrappedWAL) ConfigChangeEntries() []raftpb.Entry { return serverstorage.CreateConfigChangeEnts( wal.lg, serverstorage.GetIDs(wal.lg, wal.snapshot, wal.ents), - uint64(meta.nodeID), + uint64(wal.meta.nodeID), wal.st.Term, wal.st.Commit, ) diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 421f7d059..8d55295e1 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -320,7 +320,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { errorc: make(chan error, 1), v2store: b.storage.st, snapshotter: b.ss, - r: *b.cluster.raft.newRaftNode(b.ss, b.cluster.wal.w, b.cluster.cl), + r: *b.raft.newRaftNode(b.ss, b.cluster.wal.w, b.cluster.cl), id: b.cluster.nodeID, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, cluster: b.cluster.cl, From d3abf774eaeffafc2e9b1e881d23e70a65570b2b Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 15:20:56 +0200 Subject: [PATCH 18/43] server: Move cluster backend setting up the call hierarchy --- server/etcdserver/bootstrap.go | 37 ++++++++++++++++------------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 9bb3e8155..e081e6bd2 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -247,19 +247,19 @@ func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrapp switch { case !haveWAL && !cfg.NewCluster: - c, err = bootstrapExistingClusterNoWAL(cfg, prt, storage.st, storage.backend.be) + c, err = bootstrapExistingClusterNoWAL(cfg, prt) if err != nil { return nil, err } c.wal = bootstrapNewWAL(cfg, c.cl, c.nodeID) case !haveWAL && cfg.NewCluster: - c, err = bootstrapNewClusterNoWAL(cfg, prt, storage.st, storage.backend.be) + c, err = bootstrapNewClusterNoWAL(cfg, prt) if err != nil { return nil, err } c.wal = bootstrapNewWAL(cfg, c.cl, c.nodeID) case haveWAL: - c, err = bootstrapClusterWithWAL(cfg, storage, bwal.meta) + c, err = bootstrapClusterWithWAL(cfg, bwal.meta) if err != nil { return nil, err } @@ -267,10 +267,20 @@ func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrapp default: return nil, fmt.Errorf("unsupported bootstrap config") } + c.cl.SetStore(storage.st) + c.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.backend.be)) + if haveWAL { + c.cl.Recover(api.UpdateCapability) + if c.cl.Version() != nil && !c.cl.Version().LessThan(semver.Version{Major: 3}) && !storage.backend.beExist { + bepath := cfg.BackendPath() + os.RemoveAll(bepath) + return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) + } + } return c, nil } -func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrapedCluster, error) { +func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper) (*bootstrapedCluster, error) { if err := cfg.VerifyJoinExisting(); err != nil { return nil, err } @@ -291,8 +301,6 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe remotes := existingCluster.Members() cl.SetID(types.ID(0), existingCluster.ID()) - cl.SetStore(st) - cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) member := cl.MemberByName(cfg.Name) return &bootstrapedCluster{ remotes: remotes, @@ -301,7 +309,7 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe }, nil } -func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st v2store.Store, be backend.Backend) (*bootstrapedCluster, error) { +func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper) (*bootstrapedCluster, error) { if err := cfg.VerifyBootstrap(); err != nil { return nil, err } @@ -331,17 +339,14 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper, st return nil, err } } - cl.SetStore(st) - cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, be)) - member := cl.MemberByName(cfg.Name) return &bootstrapedCluster{ remotes: nil, cl: cl, - nodeID: member.ID, + nodeID: m.ID, }, nil } -func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStorage, meta *snapshotMetadata) (*bootstrapedCluster, error) { +func bootstrapClusterWithWAL(cfg config.ServerConfig, meta *snapshotMetadata) (*bootstrapedCluster, error) { if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } @@ -354,14 +359,6 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, storage *bootstrappedStora } cl := membership.NewCluster(cfg.Logger) cl.SetID(meta.nodeID, meta.clusterID) - cl.SetStore(storage.st) - cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.backend.be)) - cl.Recover(api.UpdateCapability) - if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !storage.backend.beExist { - bepath := cfg.BackendPath() - os.RemoveAll(bepath) - return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) - } return &bootstrapedCluster{ cl: cl, nodeID: meta.nodeID, From 4884e7d8cf92d8a6d42b94b20807907ace677738 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 21 Jul 2021 15:57:53 +0200 Subject: [PATCH 19/43] server: Move wal bootstrap from cluster to storage --- server/etcdserver/bootstrap.go | 128 +++++++++++++++++---------------- server/etcdserver/server.go | 2 +- 2 files changed, 69 insertions(+), 61 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index e081e6bd2..0cd73ddb1 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -72,21 +72,42 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { return nil, fmt.Errorf("cannot access member directory: %v", terr) } + haveWAL := wal.Exist(cfg.WALDir()) - s, err := bootstrapStorage(cfg, haveWAL, ss, prt) + st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) + backend, err := bootstrapBackend(cfg, haveWAL, st, ss) if err != nil { return nil, err } + var ( + bwal *bootstrappedWAL + ) + + if haveWAL { + if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil { + return nil, fmt.Errorf("cannot write to WAL directory: %v", err) + } + bwal = bootstrapWALFromSnapshot(cfg, backend.snapshot) + } + + cluster, err := bootstrapCluster(cfg, bwal, prt) + if err != nil { + backend.be.Close() + return nil, err + } - cluster, err := bootstrapCluster(cfg, haveWAL, s, prt, ss) + s, err := bootstrapStorage(cfg, st, backend, bwal, cluster) if err != nil { - s.backend.be.Close() + backend.be.Close() return nil, err } - raft := bootstrapRaft(cfg, haveWAL, cluster.cl, cluster.wal) - if !haveWAL { - cluster.cl.SetID(cluster.nodeID, cluster.cl.ID()) + + err = cluster.Finalize(cfg, s) + if err != nil { + backend.be.Close() + return nil, err } + raft := bootstrapRaft(cfg, cluster, s.wal) return &bootstrappedServer{ prt: prt, ss: ss, @@ -106,6 +127,7 @@ type bootstrappedServer struct { type bootstrappedStorage struct { backend *bootstrappedBackend + wal *bootstrappedWAL st v2store.Store } @@ -119,7 +141,6 @@ type bootstrappedBackend struct { type bootstrapedCluster struct { remotes []*membership.Member - wal *bootstrappedWAL cl *membership.RaftCluster nodeID types.ID } @@ -133,17 +154,15 @@ type bootstrappedRaft struct { storage *raft.MemoryStorage } -func bootstrapStorage(cfg config.ServerConfig, haveWAL bool, ss *snap.Snapshotter, prt http.RoundTripper) (b *bootstrappedStorage, err error) { - st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) - - backend, err := bootstrapBackend(cfg, haveWAL, st, ss) - if err != nil { - return nil, err +func bootstrapStorage(cfg config.ServerConfig, st v2store.Store, be *bootstrappedBackend, wal *bootstrappedWAL, cl *bootstrapedCluster) (b *bootstrappedStorage, err error) { + if wal == nil { + wal = bootstrapNewWAL(cfg, cl) } return &bootstrappedStorage{ - backend: backend, + backend: be, st: st, + wal: wal, }, nil } @@ -233,49 +252,19 @@ func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error { return be.Defrag() } -func bootstrapCluster(cfg config.ServerConfig, haveWAL bool, storage *bootstrappedStorage, prt http.RoundTripper, ss *snap.Snapshotter) (c *bootstrapedCluster, err error) { - c = &bootstrapedCluster{} - var ( - bwal *bootstrappedWAL - ) - if haveWAL { - if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil { - return nil, fmt.Errorf("cannot write to WAL directory: %v", err) - } - bwal = bootstrapWALFromSnapshot(cfg, storage.backend.snapshot) - } - +func bootstrapCluster(cfg config.ServerConfig, bwal *bootstrappedWAL, prt http.RoundTripper) (c *bootstrapedCluster, err error) { switch { - case !haveWAL && !cfg.NewCluster: + case bwal == nil && !cfg.NewCluster: c, err = bootstrapExistingClusterNoWAL(cfg, prt) - if err != nil { - return nil, err - } - c.wal = bootstrapNewWAL(cfg, c.cl, c.nodeID) - case !haveWAL && cfg.NewCluster: + case bwal == nil && cfg.NewCluster: c, err = bootstrapNewClusterNoWAL(cfg, prt) - if err != nil { - return nil, err - } - c.wal = bootstrapNewWAL(cfg, c.cl, c.nodeID) - case haveWAL: + case bwal != nil && bwal.haveWAL: c, err = bootstrapClusterWithWAL(cfg, bwal.meta) - if err != nil { - return nil, err - } - c.wal = bwal default: return nil, fmt.Errorf("unsupported bootstrap config") } - c.cl.SetStore(storage.st) - c.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, storage.backend.be)) - if haveWAL { - c.cl.Recover(api.UpdateCapability) - if c.cl.Version() != nil && !c.cl.Version().LessThan(semver.Version{Major: 3}) && !storage.backend.beExist { - bepath := cfg.BackendPath() - os.RemoveAll(bepath) - return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) - } + if err != nil { + return nil, err } return c, nil } @@ -425,13 +414,30 @@ func recoverSnapshot(cfg config.ServerConfig, st v2store.Store, be backend.Backe return snapshot, be, nil } -func bootstrapRaft(cfg config.ServerConfig, haveWAL bool, cl *membership.RaftCluster, bwal *bootstrappedWAL) *bootstrappedRaft { - switch { - case !haveWAL && !cfg.NewCluster: - return bootstrapRaftFromCluster(cfg, cl, nil, bwal) - case !haveWAL && cfg.NewCluster: - return bootstrapRaftFromCluster(cfg, cl, cl.MemberIDs(), bwal) - case haveWAL: +func (c *bootstrapedCluster) Finalize(cfg config.ServerConfig, s *bootstrappedStorage) error { + if !s.wal.haveWAL { + c.cl.SetID(c.nodeID, c.cl.ID()) + } + c.cl.SetStore(s.st) + c.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, s.backend.be)) + if s.wal.haveWAL { + c.cl.Recover(api.UpdateCapability) + if c.cl.Version() != nil && !c.cl.Version().LessThan(semver.Version{Major: 3}) && !s.backend.beExist { + bepath := cfg.BackendPath() + os.RemoveAll(bepath) + return fmt.Errorf("database file (%v) of the backend is missing", bepath) + } + } + return nil +} + +func bootstrapRaft(cfg config.ServerConfig, cluster *bootstrapedCluster, bwal *bootstrappedWAL) *bootstrappedRaft { + switch { + case !bwal.haveWAL && !cfg.NewCluster: + return bootstrapRaftFromCluster(cfg, cluster.cl, nil, bwal) + case !bwal.haveWAL && cfg.NewCluster: + return bootstrapRaftFromCluster(cfg, cluster.cl, cluster.cl.MemberIDs(), bwal) + case bwal.haveWAL: return bootstrapRaftFromWAL(cfg, bwal) default: cfg.Logger.Panic("unsupported bootstrap config") @@ -520,6 +526,7 @@ func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot ents: ents, snapshot: snap, meta: meta, + haveWAL: true, } if cfg.ForceNewCluster { @@ -590,11 +597,11 @@ type snapshotMetadata struct { nodeID, clusterID types.ID } -func bootstrapNewWAL(cfg config.ServerConfig, cl *membership.RaftCluster, nodeID types.ID) *bootstrappedWAL { +func bootstrapNewWAL(cfg config.ServerConfig, cl *bootstrapedCluster) *bootstrappedWAL { metadata := pbutil.MustMarshal( &etcdserverpb.Metadata{ - NodeID: uint64(nodeID), - ClusterID: uint64(cl.ID()), + NodeID: uint64(cl.nodeID), + ClusterID: uint64(cl.cl.ID()), }, ) w, err := wal.Create(cfg.Logger, cfg.WALDir(), metadata) @@ -613,6 +620,7 @@ func bootstrapNewWAL(cfg config.ServerConfig, cl *membership.RaftCluster, nodeID type bootstrappedWAL struct { lg *zap.Logger + haveWAL bool w *wal.WAL st *raftpb.HardState ents []raftpb.Entry diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 8d55295e1..c0c5aa1a0 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -320,7 +320,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { errorc: make(chan error, 1), v2store: b.storage.st, snapshotter: b.ss, - r: *b.raft.newRaftNode(b.ss, b.cluster.wal.w, b.cluster.cl), + r: *b.raft.newRaftNode(b.ss, b.storage.wal.w, b.cluster.cl), id: b.cluster.nodeID, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, cluster: b.cluster.cl, From a450dc7f91cde674197331c87dd4cfdba24b6947 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 5 Aug 2021 11:53:47 +0200 Subject: [PATCH 20/43] server: Rename function to NewConfigChangeEntries indicating we are not reading it from wal --- server/etcdserver/bootstrap.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 0cd73ddb1..31d6f6408 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -532,7 +532,7 @@ func bootstrapWALFromSnapshot(cfg config.ServerConfig, snapshot *raftpb.Snapshot if cfg.ForceNewCluster { // discard the previously uncommitted entries bwal.ents = bwal.CommitedEntries() - entries := bwal.ConfigChangeEntries() + entries := bwal.NewConfigChangeEntries() // force commit config change entries bwal.AppendAndCommitEntries(entries) cfg.Logger.Info( @@ -657,7 +657,7 @@ func (wal *bootstrappedWAL) CommitedEntries() []raftpb.Entry { return wal.ents } -func (wal *bootstrappedWAL) ConfigChangeEntries() []raftpb.Entry { +func (wal *bootstrappedWAL) NewConfigChangeEntries() []raftpb.Entry { return serverstorage.CreateConfigChangeEnts( wal.lg, serverstorage.GetIDs(wal.lg, wal.snapshot, wal.ents), From a206ad2c9636fe850775c86bc6ef63e7fe845168 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 5 Aug 2021 11:55:08 +0200 Subject: [PATCH 21/43] server: Rename GetIDs to GetEffectiveNodeIDsFromWalEntries --- server/etcdserver/bootstrap.go | 2 +- server/etcdserver/raft_test.go | 2 +- server/storage/util.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 31d6f6408..318f1f8e3 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -660,7 +660,7 @@ func (wal *bootstrappedWAL) CommitedEntries() []raftpb.Entry { func (wal *bootstrappedWAL) NewConfigChangeEntries() []raftpb.Entry { return serverstorage.CreateConfigChangeEnts( wal.lg, - serverstorage.GetIDs(wal.lg, wal.snapshot, wal.ents), + serverstorage.GetEffectiveNodeIDsFromWalEntries(wal.lg, wal.snapshot, wal.ents), uint64(wal.meta.nodeID), wal.st.Term, wal.st.Commit, diff --git a/server/etcdserver/raft_test.go b/server/etcdserver/raft_test.go index 49de844b5..f552f8180 100644 --- a/server/etcdserver/raft_test.go +++ b/server/etcdserver/raft_test.go @@ -67,7 +67,7 @@ func TestGetIDs(t *testing.T) { if tt.confState != nil { snap.Metadata.ConfState = *tt.confState } - idSet := serverstorage.GetIDs(testLogger, &snap, tt.ents) + idSet := serverstorage.GetEffectiveNodeIDsFromWalEntries(testLogger, &snap, tt.ents) if !reflect.DeepEqual(idSet, tt.widSet) { t.Errorf("#%d: idset = %#v, want %#v", i, idSet, tt.widSet) } diff --git a/server/storage/util.go b/server/storage/util.go index bdac72ec1..252e74f92 100644 --- a/server/storage/util.go +++ b/server/storage/util.go @@ -109,13 +109,13 @@ func CreateConfigChangeEnts(lg *zap.Logger, ids []uint64, self uint64, term, ind return ents } -// GetIDs returns an ordered set of IDs included in the given snapshot and +// GetEffectiveNodeIDsFromWalEntries returns an ordered set of IDs included in the given snapshot and // the entries. The given snapshot/entries can contain three kinds of // ID-related entry: // - ConfChangeAddNode, in which case the contained ID will Be added into the set. // - ConfChangeRemoveNode, in which case the contained ID will Be removed from the set. // - ConfChangeAddLearnerNode, in which the contained ID will Be added into the set. -func GetIDs(lg *zap.Logger, snap *raftpb.Snapshot, ents []raftpb.Entry) []uint64 { +func GetEffectiveNodeIDsFromWalEntries(lg *zap.Logger, snap *raftpb.Snapshot, ents []raftpb.Entry) []uint64 { ids := make(map[uint64]bool) if snap != nil { for _, id := range snap.Metadata.ConfState.Voters { From 39f92a32ca0f32be80bf9845637bf70d2529c015 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 6 Aug 2021 17:21:24 +0200 Subject: [PATCH 22/43] server: Move member dir creation up and introduce Close method to bootstrap structs --- server/etcdserver/bootstrap.go | 28 ++++++++++++++++++++-------- server/etcdserver/server.go | 2 +- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 318f1f8e3..b25684db8 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -63,16 +63,16 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil { return nil, fmt.Errorf("cannot access data directory: %v", terr) } + + if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { + return nil, fmt.Errorf("cannot access member directory: %v", terr) + } ss := bootstrapSnapshot(cfg) prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.PeerDialTimeout()) if err != nil { return nil, err } - if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { - return nil, fmt.Errorf("cannot access member directory: %v", terr) - } - haveWAL := wal.Exist(cfg.WALDir()) st := v2store.New(StoreClusterPrefix, StoreKeysPrefix) backend, err := bootstrapBackend(cfg, haveWAL, st, ss) @@ -92,19 +92,19 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { cluster, err := bootstrapCluster(cfg, bwal, prt) if err != nil { - backend.be.Close() + backend.Close() return nil, err } s, err := bootstrapStorage(cfg, st, backend, bwal, cluster) if err != nil { - backend.be.Close() + backend.Close() return nil, err } err = cluster.Finalize(cfg, s) if err != nil { - backend.be.Close() + backend.Close() return nil, err } raft := bootstrapRaft(cfg, cluster, s.wal) @@ -125,12 +125,20 @@ type bootstrappedServer struct { ss *snap.Snapshotter } +func (s *bootstrappedServer) Close() { + s.storage.Close() +} + type bootstrappedStorage struct { backend *bootstrappedBackend wal *bootstrappedWAL st v2store.Store } +func (s *bootstrappedStorage) Close() { + s.backend.Close() +} + type bootstrappedBackend struct { beHooks *serverstorage.BackendHooks be backend.Backend @@ -139,6 +147,10 @@ type bootstrappedBackend struct { snapshot *raftpb.Snapshot } +func (s *bootstrappedBackend) Close() { + s.be.Close() +} + type bootstrapedCluster struct { remotes []*membership.Member cl *membership.RaftCluster @@ -343,7 +355,7 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, meta *snapshotMetadata) (* if cfg.ShouldDiscover() { cfg.Logger.Warn( "discovery token is ignored since cluster already initialized; valid logs are found", - zap.String("bwal-dir", cfg.WALDir()), + zap.String("wal-dir", cfg.WALDir()), ) } cl := membership.NewCluster(cfg.Logger) diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index c0c5aa1a0..310f436e5 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -304,7 +304,7 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) { defer func() { if err != nil { - b.storage.backend.be.Close() + b.Close() } }() From 35db0a581723d240da756468d3a70e283aaf2db6 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 19 Aug 2021 16:39:33 +0200 Subject: [PATCH 23/43] server: Refactor databaseFileMissing function --- server/etcdserver/bootstrap.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index b25684db8..f1f74fea9 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -434,7 +434,7 @@ func (c *bootstrapedCluster) Finalize(cfg config.ServerConfig, s *bootstrappedSt c.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, s.backend.be)) if s.wal.haveWAL { c.cl.Recover(api.UpdateCapability) - if c.cl.Version() != nil && !c.cl.Version().LessThan(semver.Version{Major: 3}) && !s.backend.beExist { + if c.databaseFileMissing(s) { bepath := cfg.BackendPath() os.RemoveAll(bepath) return fmt.Errorf("database file (%v) of the backend is missing", bepath) @@ -443,6 +443,11 @@ func (c *bootstrapedCluster) Finalize(cfg config.ServerConfig, s *bootstrappedSt return nil } +func (c *bootstrapedCluster) databaseFileMissing(s *bootstrappedStorage) bool { + v3Cluster := c.cl.Version() != nil && !c.cl.Version().LessThan(semver.Version{Major: 3}) + return v3Cluster && !s.backend.beExist +} + func bootstrapRaft(cfg config.ServerConfig, cluster *bootstrapedCluster, bwal *bootstrappedWAL) *bootstrappedRaft { switch { case !bwal.haveWAL && !cfg.NewCluster: From e7a09db019b8902e5cd4e42e05817791727cceeb Mon Sep 17 00:00:00 2001 From: Sunny Liu Date: Fri, 24 Sep 2021 10:57:30 +0800 Subject: [PATCH 24/43] mvcc: simplify watchable_store addVictim code --- server/storage/mvcc/watchable_store.go | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/server/storage/mvcc/watchable_store.go b/server/storage/mvcc/watchable_store.go index 3e9606b19..c2a8832db 100644 --- a/server/storage/mvcc/watchable_store.go +++ b/server/storage/mvcc/watchable_store.go @@ -358,7 +358,7 @@ func (s *watchableStore) syncWatchers() int { tx.RUnlock() evs := kvsToEvents(s.store.lg, wg, revs, vs) - var victims watcherBatch + victims := make(watcherBatch) wb := newWatcherBatch(wg, evs) for w := range wg.watchers { w.minRev = curRev + 1 @@ -378,9 +378,6 @@ func (s *watchableStore) syncWatchers() int { if w.send(WatchResponse{WatchID: w.id, Events: eb.evs, Revision: curRev}) { pendingEventsGauge.Add(float64(len(eb.evs))) } else { - if victims == nil { - victims = make(watcherBatch) - } w.victim = true } @@ -432,7 +429,7 @@ func kvsToEvents(lg *zap.Logger, wg *watcherGroup, revs, vals [][]byte) (evs []m // notify notifies the fact that given event at the given rev just happened to // watchers that watch on the key of the event. func (s *watchableStore) notify(rev int64, evs []mvccpb.Event) { - var victim watcherBatch + victim := make(watcherBatch) for w, eb := range newWatcherBatch(&s.synced, evs) { if eb.revs != 1 { s.store.lg.Panic( @@ -445,9 +442,6 @@ func (s *watchableStore) notify(rev int64, evs []mvccpb.Event) { } else { // move slow watcher to victims w.minRev = rev + 1 - if victim == nil { - victim = make(watcherBatch) - } w.victim = true victim[w] = eb s.synced.delete(w) @@ -458,7 +452,7 @@ func (s *watchableStore) notify(rev int64, evs []mvccpb.Event) { } func (s *watchableStore) addVictim(victim watcherBatch) { - if victim == nil { + if len(victim) == 0 { return } s.victims = append(s.victims, victim) From b272b98b79a392ac269d5c1577e15b655844ce1a Mon Sep 17 00:00:00 2001 From: Sergey Kacheev Date: Sun, 26 Sep 2021 13:50:10 +0700 Subject: [PATCH 25/43] netutil: make a `raw` URL comparison part of the urlsEqual function --- pkg/netutil/netutil.go | 31 ++++++++++++++------------ pkg/netutil/netutil_test.go | 44 +++++++++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/pkg/netutil/netutil.go b/pkg/netutil/netutil.go index 5b1e17966..43c93ba3c 100644 --- a/pkg/netutil/netutil.go +++ b/pkg/netutil/netutil.go @@ -148,20 +148,31 @@ func urlsEqual(ctx context.Context, lg *zap.Logger, a []url.URL, b []url.URL) (b if len(a) != len(b) { return false, fmt.Errorf("len(%q) != len(%q)", urlsToStrings(a), urlsToStrings(b)) } + + sort.Sort(types.URLs(a)) + sort.Sort(types.URLs(b)) + var needResolve bool + for i := range a { + if !reflect.DeepEqual(a[i], b[i]) { + needResolve = true + break + } + } + if !needResolve { + return true, nil + } + + // If URLs are not equal, try to resolve it and compare again. urls, err := resolveTCPAddrs(ctx, lg, [][]url.URL{a, b}) if err != nil { return false, err } - preva, prevb := a, b a, b = urls[0], urls[1] sort.Sort(types.URLs(a)) sort.Sort(types.URLs(b)) for i := range a { if !reflect.DeepEqual(a[i], b[i]) { - return false, fmt.Errorf("%q(resolved from %q) != %q(resolved from %q)", - a[i].String(), preva[i].String(), - b[i].String(), prevb[i].String(), - ) + return false, fmt.Errorf("resolved urls: %q != %q", a[i].String(), b[i].String()) } } return true, nil @@ -188,15 +199,7 @@ func URLStringsEqual(ctx context.Context, lg *zap.Logger, a []string, b []string lg = zap.NewExample() } } - sort.Sort(types.URLs(urlsA)) - sort.Sort(types.URLs(urlsB)) - for i := range urlsA { - if !reflect.DeepEqual(urlsA[i], urlsB[i]) { - // If URLs are not equal, try to resolve it and compare again. - return urlsEqual(ctx, lg, urlsA, urlsB) - } - } - return true, nil + return urlsEqual(ctx, lg, urlsA, urlsB) } func urlsToStrings(us []url.URL) []string { diff --git a/pkg/netutil/netutil_test.go b/pkg/netutil/netutil_test.go index 7d1d17aa2..22db427e0 100644 --- a/pkg/netutil/netutil_test.go +++ b/pkg/netutil/netutil_test.go @@ -167,113 +167,133 @@ func TestURLsEqual(t *testing.T) { } tests := []struct { + n int a []url.URL b []url.URL expect bool err error }{ { + n: 0, a: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}}, b: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}}, expect: true, }, { + n: 1, a: []url.URL{{Scheme: "http", Host: "example.com:2379"}}, b: []url.URL{{Scheme: "http", Host: "10.0.10.1:2379"}}, expect: true, }, { + n: 2, a: []url.URL{{Scheme: "http", Host: "example.com:2379"}}, b: []url.URL{{Scheme: "https", Host: "10.0.10.1:2379"}}, expect: false, - err: errors.New(`"http://10.0.10.1:2379"(resolved from "http://example.com:2379") != "https://10.0.10.1:2379"(resolved from "https://10.0.10.1:2379")`), + err: errors.New(`resolved urls: "http://10.0.10.1:2379" != "https://10.0.10.1:2379"`), }, { + n: 3, a: []url.URL{{Scheme: "https", Host: "example.com:2379"}}, b: []url.URL{{Scheme: "http", Host: "10.0.10.1:2379"}}, expect: false, - err: errors.New(`"https://10.0.10.1:2379"(resolved from "https://example.com:2379") != "http://10.0.10.1:2379"(resolved from "http://10.0.10.1:2379")`), + err: errors.New(`resolved urls: "https://10.0.10.1:2379" != "http://10.0.10.1:2379"`), }, { + n: 4, a: []url.URL{{Scheme: "unix", Host: "abc:2379"}}, b: []url.URL{{Scheme: "unix", Host: "abc:2379"}}, expect: true, }, { + n: 5, a: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, b: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: true, }, { + n: 6, a: []url.URL{{Scheme: "http", Host: "example.com:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, b: []url.URL{{Scheme: "http", Host: "example.com:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: true, }, { + n: 7, a: []url.URL{{Scheme: "http", Host: "10.0.10.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, b: []url.URL{{Scheme: "http", Host: "example.com:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: true, }, { + n: 8, a: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}}, b: []url.URL{{Scheme: "http", Host: "127.0.0.1:2380"}}, expect: false, - err: errors.New(`"http://127.0.0.1:2379"(resolved from "http://127.0.0.1:2379") != "http://127.0.0.1:2380"(resolved from "http://127.0.0.1:2380")`), + err: errors.New(`resolved urls: "http://127.0.0.1:2379" != "http://127.0.0.1:2380"`), }, { + n: 9, a: []url.URL{{Scheme: "http", Host: "example.com:2380"}}, b: []url.URL{{Scheme: "http", Host: "10.0.10.1:2379"}}, expect: false, - err: errors.New(`"http://10.0.10.1:2380"(resolved from "http://example.com:2380") != "http://10.0.10.1:2379"(resolved from "http://10.0.10.1:2379")`), + err: errors.New(`resolved urls: "http://10.0.10.1:2380" != "http://10.0.10.1:2379"`), }, { + n: 10, a: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}}, b: []url.URL{{Scheme: "http", Host: "10.0.0.1:2379"}}, expect: false, - err: errors.New(`"http://127.0.0.1:2379"(resolved from "http://127.0.0.1:2379") != "http://10.0.0.1:2379"(resolved from "http://10.0.0.1:2379")`), + err: errors.New(`resolved urls: "http://127.0.0.1:2379" != "http://10.0.0.1:2379"`), }, { + n: 11, a: []url.URL{{Scheme: "http", Host: "example.com:2379"}}, b: []url.URL{{Scheme: "http", Host: "10.0.0.1:2379"}}, expect: false, - err: errors.New(`"http://10.0.10.1:2379"(resolved from "http://example.com:2379") != "http://10.0.0.1:2379"(resolved from "http://10.0.0.1:2379")`), + err: errors.New(`resolved urls: "http://10.0.10.1:2379" != "http://10.0.0.1:2379"`), }, { + n: 12, a: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, b: []url.URL{{Scheme: "http", Host: "127.0.0.1:2380"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: false, - err: errors.New(`"http://127.0.0.1:2379"(resolved from "http://127.0.0.1:2379") != "http://127.0.0.1:2380"(resolved from "http://127.0.0.1:2380")`), + err: errors.New(`resolved urls: "http://127.0.0.1:2379" != "http://127.0.0.1:2380"`), }, { + n: 13, a: []url.URL{{Scheme: "http", Host: "example.com:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, b: []url.URL{{Scheme: "http", Host: "127.0.0.1:2380"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: false, - err: errors.New(`"http://10.0.10.1:2379"(resolved from "http://example.com:2379") != "http://127.0.0.1:2380"(resolved from "http://127.0.0.1:2380")`), + err: errors.New(`resolved urls: "http://10.0.10.1:2379" != "http://127.0.0.1:2380"`), }, { + n: 14, a: []url.URL{{Scheme: "http", Host: "127.0.0.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, b: []url.URL{{Scheme: "http", Host: "10.0.0.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: false, - err: errors.New(`"http://127.0.0.1:2379"(resolved from "http://127.0.0.1:2379") != "http://10.0.0.1:2379"(resolved from "http://10.0.0.1:2379")`), + err: errors.New(`resolved urls: "http://127.0.0.1:2379" != "http://10.0.0.1:2379"`), }, { + n: 15, a: []url.URL{{Scheme: "http", Host: "example.com:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, b: []url.URL{{Scheme: "http", Host: "10.0.0.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: false, - err: errors.New(`"http://10.0.10.1:2379"(resolved from "http://example.com:2379") != "http://10.0.0.1:2379"(resolved from "http://10.0.0.1:2379")`), + err: errors.New(`resolved urls: "http://10.0.10.1:2379" != "http://10.0.0.1:2379"`), }, { + n: 16, a: []url.URL{{Scheme: "http", Host: "10.0.0.1:2379"}}, b: []url.URL{{Scheme: "http", Host: "10.0.0.1:2379"}, {Scheme: "http", Host: "127.0.0.1:2380"}}, expect: false, err: errors.New(`len(["http://10.0.0.1:2379"]) != len(["http://10.0.0.1:2379" "http://127.0.0.1:2380"])`), }, { + n: 17, a: []url.URL{{Scheme: "http", Host: "first.com:2379"}, {Scheme: "http", Host: "second.com:2380"}}, b: []url.URL{{Scheme: "http", Host: "10.0.11.1:2379"}, {Scheme: "http", Host: "10.0.11.2:2380"}}, expect: true, }, { + n: 18, a: []url.URL{{Scheme: "http", Host: "second.com:2380"}, {Scheme: "http", Host: "first.com:2379"}}, b: []url.URL{{Scheme: "http", Host: "10.0.11.1:2379"}, {Scheme: "http", Host: "10.0.11.2:2380"}}, expect: true, @@ -283,11 +303,11 @@ func TestURLsEqual(t *testing.T) { for i, test := range tests { result, err := urlsEqual(context.TODO(), zap.NewExample(), test.a, test.b) if result != test.expect { - t.Errorf("#%d: a:%v b:%v, expected %v but %v", i, test.a, test.b, test.expect, result) + t.Errorf("idx=%d #%d: a:%v b:%v, expected %v but %v", i, test.n, test.a, test.b, test.expect, result) } if test.err != nil { if err.Error() != test.err.Error() { - t.Errorf("#%d: err expected %v but %v", i, test.err, err) + t.Errorf("idx=%d #%d: err expected %v but %v", i, test.n, test.err, err) } } } From 817d2f40d10adb94adb0f178a6abefc04172eeab Mon Sep 17 00:00:00 2001 From: Geeta Gharpure Date: Mon, 27 Sep 2021 17:00:33 -0700 Subject: [PATCH 26/43] storage/backend: Add a gauge to indicate if defrag is active --- server/storage/backend/backend.go | 2 ++ server/storage/backend/metrics.go | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index b7207c171..529f83b0f 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -432,6 +432,8 @@ func (b *backend) Defrag() error { func (b *backend) defrag() error { now := time.Now() + isDefragActive.Set(1) + defer isDefragActive.Set(0) // TODO: make this non-blocking? // lock batchTx to ensure nobody is using previous tx, and then diff --git a/server/storage/backend/metrics.go b/server/storage/backend/metrics.go index d9641af7a..9d58c0063 100644 --- a/server/storage/backend/metrics.go +++ b/server/storage/backend/metrics.go @@ -83,6 +83,13 @@ var ( // highest bucket start of 0.01 sec * 2^16 == 655.36 sec Buckets: prometheus.ExponentialBuckets(.01, 2, 17), }) + + isDefragActive = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "disk", + Name: "defrag_inflight", + Help: "Whether or not defrag is active on the member. 1 means active, 0 means not.", + }) ) func init() { @@ -92,4 +99,5 @@ func init() { prometheus.MustRegister(writeSec) prometheus.MustRegister(defragSec) prometheus.MustRegister(snapshotTransferSec) + prometheus.MustRegister(isDefragActive) } From 391d662f77fb8b17042d15e50ca7713bcc1ee8be Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 22 Sep 2021 15:37:23 +0200 Subject: [PATCH 27/43] tests: Remove bridge dependency on unix --- tests/integration/bridge.go | 28 ++++++++++------------------ tests/integration/cluster.go | 26 +++++++++++++++++++++----- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tests/integration/bridge.go b/tests/integration/bridge.go index 1d2be109e..22040b882 100644 --- a/tests/integration/bridge.go +++ b/tests/integration/bridge.go @@ -15,20 +15,20 @@ package integration import ( - "fmt" "io" "io/ioutil" "net" "sync" - - "go.etcd.io/etcd/client/pkg/v3/transport" ) -// bridge creates a unix socket bridge to another unix socket, making it possible +type Dialer interface { + Dial() (net.Conn, error) +} + +// bridge proxies connections between listener and dialer, making it possible // to disconnect grpc network connections without closing the logical grpc connection. type bridge struct { - inaddr string - outaddr string + dialer Dialer l net.Listener conns map[*bridgeConn]struct{} @@ -40,30 +40,22 @@ type bridge struct { mu sync.Mutex } -func newBridge(addr string) (*bridge, error) { +func newBridge(dialer Dialer, listener net.Listener) (*bridge, error) { b := &bridge{ // bridge "port" is ("%05d%05d0", port, pid) since go1.8 expects the port to be a number - inaddr: addr + "0", - outaddr: addr, + dialer: dialer, + l: listener, conns: make(map[*bridgeConn]struct{}), stopc: make(chan struct{}), pausec: make(chan struct{}), blackholec: make(chan struct{}), } close(b.pausec) - - l, err := transport.NewUnixListener(b.inaddr) - if err != nil { - return nil, fmt.Errorf("listen failed on socket %s (%v)", addr, err) - } - b.l = l b.wg.Add(1) go b.serveListen() return b, nil } -func (b *bridge) URL() string { return "unix://" + b.inaddr } - func (b *bridge) Close() { b.l.Close() b.mu.Lock() @@ -127,7 +119,7 @@ func (b *bridge) serveListen() { case <-pausec: } - outc, oerr := net.Dial("unix", b.outaddr) + outc, oerr := b.dialer.Dial() if oerr != nil { inc.Close() return diff --git a/tests/integration/cluster.go b/tests/integration/cluster.go index cbf8adacf..f5a391c5f 100644 --- a/tests/integration/cluster.go +++ b/tests/integration/cluster.go @@ -736,20 +736,36 @@ func (m *member) listenGRPC() error { if m.useIP { // for IP-only TLS certs m.grpcAddr = "127.0.0.1:" + m.Name } - l, err := transport.NewUnixListener(m.grpcAddr) + grpcListener, err := transport.NewUnixListener(m.grpcAddr) if err != nil { return fmt.Errorf("listen failed on grpc socket %s (%v)", m.grpcAddr, err) } - m.grpcBridge, err = newBridge(m.grpcAddr) + bridgeAddr := m.grpcAddr + "0" + bridgeListener, err := transport.NewUnixListener(bridgeAddr) if err != nil { - l.Close() + grpcListener.Close() + return fmt.Errorf("listen failed on bridge socket %s (%v)", m.grpcAddr, err) + } + m.grpcBridge, err = newBridge(dialer{network: "unix", addr: m.grpcAddr}, bridgeListener) + if err != nil { + bridgeListener.Close() + grpcListener.Close() return err } - m.grpcAddr = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + m.grpcBridge.inaddr - m.grpcListener = l + m.grpcAddr = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + bridgeAddr + m.grpcListener = grpcListener return nil } +type dialer struct { + network string + addr string +} + +func (d dialer) Dial() (net.Conn, error) { + return net.Dial(d.network, d.addr) +} + func (m *member) ElectionTimeout() time.Duration { return time.Duration(m.s.Cfg.ElectionTicks*int(m.s.Cfg.TickMs)) * time.Millisecond } From f2dd5d80a19dc5772b9c5ab84ca8b3d1dc75f67a Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 22 Sep 2021 15:48:13 +0200 Subject: [PATCH 28/43] tests: Rename grpcAddr to grpcURL to imply that it includes schema --- .../clientv3/connectivity/black_hole_test.go | 4 +- .../clientv3/connectivity/dial_test.go | 12 ++--- .../connectivity/network_partition_test.go | 10 ++--- .../connectivity/server_shutdown_test.go | 10 ++--- tests/integration/clientv3/kv_test.go | 6 +-- .../integration/clientv3/maintenance_test.go | 4 +- tests/integration/clientv3/metrics_test.go | 2 +- .../integration/clientv3/ordering_kv_test.go | 16 +++---- .../clientv3/ordering_util_test.go | 24 +++++----- tests/integration/cluster.go | 44 +++++++++---------- .../proxy/grpcproxy/cluster_test.go | 2 +- tests/integration/proxy/grpcproxy/kv_test.go | 2 +- .../proxy/grpcproxy/register_test.go | 2 +- tests/integration/v3_grpc_test.go | 4 +- tests/integration/v3_tls_test.go | 2 +- 15 files changed, 72 insertions(+), 72 deletions(-) diff --git a/tests/integration/clientv3/connectivity/black_hole_test.go b/tests/integration/clientv3/connectivity/black_hole_test.go index ff56bbd09..a4415322f 100644 --- a/tests/integration/clientv3/connectivity/black_hole_test.go +++ b/tests/integration/clientv3/connectivity/black_hole_test.go @@ -41,7 +41,7 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) { }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL()} ccfg := clientv3.Config{ Endpoints: []string{eps[0]}, @@ -173,7 +173,7 @@ func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Clien }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL()} ccfg := clientv3.Config{ Endpoints: []string{eps[0]}, diff --git a/tests/integration/clientv3/connectivity/dial_test.go b/tests/integration/clientv3/connectivity/dial_test.go index f02ea61aa..52dcca69e 100644 --- a/tests/integration/clientv3/connectivity/dial_test.go +++ b/tests/integration/clientv3/connectivity/dial_test.go @@ -57,7 +57,7 @@ func TestDialTLSExpired(t *testing.T) { } // expect remote errors "tls: bad certificate" _, err = integration.NewClient(t, clientv3.Config{ - Endpoints: []string{clus.Members[0].GRPCAddr()}, + Endpoints: []string{clus.Members[0].GRPCURL()}, DialTimeout: 3 * time.Second, DialOptions: []grpc.DialOption{grpc.WithBlock()}, TLS: tls, @@ -75,7 +75,7 @@ func TestDialTLSNoConfig(t *testing.T) { defer clus.Terminate(t) // expect "signed by unknown authority" c, err := integration.NewClient(t, clientv3.Config{ - Endpoints: []string{clus.Members[0].GRPCAddr()}, + Endpoints: []string{clus.Members[0].GRPCURL()}, DialTimeout: time.Second, DialOptions: []grpc.DialOption{grpc.WithBlock()}, }) @@ -108,7 +108,7 @@ func testDialSetEndpoints(t *testing.T, setBefore bool) { // get endpoint list eps := make([]string, 3) for i := range eps { - eps[i] = clus.Members[i].GRPCAddr() + eps[i] = clus.Members[i].GRPCURL() } toKill := rand.Intn(len(eps)) @@ -149,7 +149,7 @@ func TestSwitchSetEndpoints(t *testing.T) { defer clus.Terminate(t) // get non partitioned members endpoints - eps := []string{clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} + eps := []string{clus.Members[1].GRPCURL(), clus.Members[2].GRPCURL()} cli := clus.Client(0) clus.Members[0].InjectPartition(t, clus.Members[1:]...) @@ -170,7 +170,7 @@ func TestRejectOldCluster(t *testing.T) { defer clus.Terminate(t) cfg := clientv3.Config{ - Endpoints: []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}, + Endpoints: []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL()}, DialTimeout: 5 * time.Second, DialOptions: []grpc.DialOption{grpc.WithBlock()}, RejectOldCluster: true, @@ -212,7 +212,7 @@ func TestSetEndpointAndPut(t *testing.T) { clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2}) defer clus.Terminate(t) - clus.Client(1).SetEndpoints(clus.Members[0].GRPCAddr()) + clus.Client(1).SetEndpoints(clus.Members[0].GRPCURL()) _, err := clus.Client(1).Put(context.TODO(), "foo", "bar") if err != nil && !strings.Contains(err.Error(), "closing") { t.Fatal(err) diff --git a/tests/integration/clientv3/connectivity/network_partition_test.go b/tests/integration/clientv3/connectivity/network_partition_test.go index 3db643e42..c2650ebcd 100644 --- a/tests/integration/clientv3/connectivity/network_partition_test.go +++ b/tests/integration/clientv3/connectivity/network_partition_test.go @@ -111,7 +111,7 @@ func testBalancerUnderNetworkPartition(t *testing.T, op func(*clientv3.Client, c }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL(), clus.Members[2].GRPCURL()} // expect pin eps[0] ccfg := clientv3.Config{ @@ -166,7 +166,7 @@ func TestBalancerUnderNetworkPartitionLinearizableGetLeaderElection(t *testing.T SkipCreatingClient: true, }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL(), clus.Members[2].GRPCURL()} lead := clus.WaitLeader(t) @@ -222,7 +222,7 @@ func testBalancerUnderNetworkPartitionWatch(t *testing.T, isolateLeader bool) { }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL(), clus.Members[2].GRPCURL()} target := clus.WaitLeader(t) if !isolateLeader { @@ -283,7 +283,7 @@ func TestDropReadUnderNetworkPartition(t *testing.T) { defer clus.Terminate(t) leaderIndex := clus.WaitLeader(t) // get a follower endpoint - eps := []string{clus.Members[(leaderIndex+1)%3].GRPCAddr()} + eps := []string{clus.Members[(leaderIndex+1)%3].GRPCURL()} ccfg := clientv3.Config{ Endpoints: eps, DialTimeout: 10 * time.Second, @@ -301,7 +301,7 @@ func TestDropReadUnderNetworkPartition(t *testing.T) { // add other endpoints for later endpoint switch cli.SetEndpoints(eps...) time.Sleep(time.Second * 2) - conn, err := cli.Dial(clus.Members[(leaderIndex+1)%3].GRPCAddr()) + conn, err := cli.Dial(clus.Members[(leaderIndex+1)%3].GRPCURL()) if err != nil { t.Fatal(err) } diff --git a/tests/integration/clientv3/connectivity/server_shutdown_test.go b/tests/integration/clientv3/connectivity/server_shutdown_test.go index 8ab90cbc5..e7660852b 100644 --- a/tests/integration/clientv3/connectivity/server_shutdown_test.go +++ b/tests/integration/clientv3/connectivity/server_shutdown_test.go @@ -38,7 +38,7 @@ func TestBalancerUnderServerShutdownWatch(t *testing.T) { }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL(), clus.Members[2].GRPCURL()} lead := clus.WaitLeader(t) @@ -150,7 +150,7 @@ func testBalancerUnderServerShutdownMutable(t *testing.T, op func(*clientv3.Clie }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL(), clus.Members[2].GRPCURL()} // pin eps[0] cli, err := integration.NewClient(t, clientv3.Config{Endpoints: []string{eps[0]}}) @@ -208,7 +208,7 @@ func testBalancerUnderServerShutdownImmutable(t *testing.T, op func(*clientv3.Cl }) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL(), clus.Members[2].GRPCURL()} // pin eps[0] cli, err := integration.NewClient(t, clientv3.Config{Endpoints: []string{eps[0]}}) @@ -285,9 +285,9 @@ func testBalancerUnderServerStopInflightRangeOnRestart(t *testing.T, linearizabl clus := integration.NewClusterV3(t, cfg) defer clus.Terminate(t) - eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()} + eps := []string{clus.Members[0].GRPCURL(), clus.Members[1].GRPCURL()} if linearizable { - eps = append(eps, clus.Members[2].GRPCAddr()) + eps = append(eps, clus.Members[2].GRPCURL()) } lead := clus.WaitLeader(t) diff --git a/tests/integration/clientv3/kv_test.go b/tests/integration/clientv3/kv_test.go index fcef1a871..ac694aa21 100644 --- a/tests/integration/clientv3/kv_test.go +++ b/tests/integration/clientv3/kv_test.go @@ -1027,7 +1027,7 @@ func TestKVForLearner(t *testing.T) { // 1. clus.Members[3] is the newly added learner member, which was appended to clus.Members // 2. we are using member's grpcAddr instead of clientURLs as the endpoint for clientv3.Config, // because the implementation of integration test has diverged from embed/etcd.go. - learnerEp := clus.Members[3].GRPCAddr() + learnerEp := clus.Members[3].GRPCURL() cfg := clientv3.Config{ Endpoints: []string{learnerEp}, DialTimeout: 5 * time.Second, @@ -1100,7 +1100,7 @@ func TestBalancerSupportLearner(t *testing.T) { } // clus.Members[3] is the newly added learner member, which was appended to clus.Members - learnerEp := clus.Members[3].GRPCAddr() + learnerEp := clus.Members[3].GRPCURL() cfg := clientv3.Config{ Endpoints: []string{learnerEp}, DialTimeout: 5 * time.Second, @@ -1120,7 +1120,7 @@ func TestBalancerSupportLearner(t *testing.T) { } t.Logf("Expected: Read from learner error: %v", err) - eps := []string{learnerEp, clus.Members[0].GRPCAddr()} + eps := []string{learnerEp, clus.Members[0].GRPCURL()} cli.SetEndpoints(eps...) if _, err := cli.Get(context.Background(), "foo"); err != nil { t.Errorf("expect no error (balancer should retry when request to learner fails), got error: %v", err) diff --git a/tests/integration/clientv3/maintenance_test.go b/tests/integration/clientv3/maintenance_test.go index 4bd137d8f..965599583 100644 --- a/tests/integration/clientv3/maintenance_test.go +++ b/tests/integration/clientv3/maintenance_test.go @@ -56,7 +56,7 @@ func TestMaintenanceHashKV(t *testing.T) { if _, err := cli.Get(context.TODO(), "foo"); err != nil { t.Fatal(err) } - hresp, err := cli.HashKV(context.Background(), clus.Members[i].GRPCAddr(), 0) + hresp, err := cli.HashKV(context.Background(), clus.Members[i].GRPCURL(), 0) if err != nil { t.Fatal(err) } @@ -279,7 +279,7 @@ func TestMaintenanceStatus(t *testing.T) { eps := make([]string, 3) for i := 0; i < 3; i++ { - eps[i] = clus.Members[i].GRPCAddr() + eps[i] = clus.Members[i].GRPCURL() } cli, err := integration.NewClient(t, clientv3.Config{Endpoints: eps, DialOptions: []grpc.DialOption{grpc.WithBlock()}}) diff --git a/tests/integration/clientv3/metrics_test.go b/tests/integration/clientv3/metrics_test.go index 494923d3c..4e2202cee 100644 --- a/tests/integration/clientv3/metrics_test.go +++ b/tests/integration/clientv3/metrics_test.go @@ -75,7 +75,7 @@ func TestV3ClientMetrics(t *testing.T) { defer clus.Terminate(t) cfg := clientv3.Config{ - Endpoints: []string{clus.Members[0].GRPCAddr()}, + Endpoints: []string{clus.Members[0].GRPCURL()}, DialOptions: []grpc.DialOption{ grpc.WithUnaryInterceptor(grpcprom.UnaryClientInterceptor), grpc.WithStreamInterceptor(grpcprom.StreamClientInterceptor), diff --git a/tests/integration/clientv3/ordering_kv_test.go b/tests/integration/clientv3/ordering_kv_test.go index b1f4f54ef..fdce92495 100644 --- a/tests/integration/clientv3/ordering_kv_test.go +++ b/tests/integration/clientv3/ordering_kv_test.go @@ -35,9 +35,9 @@ func TestDetectKvOrderViolation(t *testing.T) { cfg := clientv3.Config{ Endpoints: []string{ - clus.Members[0].GRPCAddr(), - clus.Members[1].GRPCAddr(), - clus.Members[2].GRPCAddr(), + clus.Members[0].GRPCURL(), + clus.Members[1].GRPCURL(), + clus.Members[2].GRPCURL(), }, } cli, err := integration.NewClient(t, cfg) @@ -82,7 +82,7 @@ func TestDetectKvOrderViolation(t *testing.T) { clus.Members[1].Stop(t) assert.NoError(t, clus.Members[2].Restart(t)) // force OrderingKv to query the third member - cli.SetEndpoints(clus.Members[2].GRPCAddr()) + cli.SetEndpoints(clus.Members[2].GRPCURL()) time.Sleep(2 * time.Second) // FIXME: Figure out how pause SetEndpoints sufficiently that this is not needed t.Logf("Quering m2 after restart") @@ -102,9 +102,9 @@ func TestDetectTxnOrderViolation(t *testing.T) { cfg := clientv3.Config{ Endpoints: []string{ - clus.Members[0].GRPCAddr(), - clus.Members[1].GRPCAddr(), - clus.Members[2].GRPCAddr(), + clus.Members[0].GRPCURL(), + clus.Members[1].GRPCURL(), + clus.Members[2].GRPCURL(), }, } cli, err := integration.NewClient(t, cfg) @@ -151,7 +151,7 @@ func TestDetectTxnOrderViolation(t *testing.T) { clus.Members[1].Stop(t) assert.NoError(t, clus.Members[2].Restart(t)) // force OrderingKv to query the third member - cli.SetEndpoints(clus.Members[2].GRPCAddr()) + cli.SetEndpoints(clus.Members[2].GRPCURL()) time.Sleep(2 * time.Second) // FIXME: Figure out how pause SetEndpoints sufficiently that this is not needed _, err = orderingKv.Get(ctx, "foo", clientv3.WithSerializable()) if err != errOrderViolation { diff --git a/tests/integration/clientv3/ordering_util_test.go b/tests/integration/clientv3/ordering_util_test.go index db3fddd99..85c61f407 100644 --- a/tests/integration/clientv3/ordering_util_test.go +++ b/tests/integration/clientv3/ordering_util_test.go @@ -29,11 +29,11 @@ func TestEndpointSwitchResolvesViolation(t *testing.T) { clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) defer clus.Terminate(t) eps := []string{ - clus.Members[0].GRPCAddr(), - clus.Members[1].GRPCAddr(), - clus.Members[2].GRPCAddr(), + clus.Members[0].GRPCURL(), + clus.Members[1].GRPCURL(), + clus.Members[2].GRPCURL(), } - cfg := clientv3.Config{Endpoints: []string{clus.Members[0].GRPCAddr()}} + cfg := clientv3.Config{Endpoints: []string{clus.Members[0].GRPCURL()}} cli, err := integration.NewClient(t, cfg) if err != nil { t.Fatal(err) @@ -71,7 +71,7 @@ func TestEndpointSwitchResolvesViolation(t *testing.T) { } t.Logf("Reconfigure client to speak only to the 'partitioned' member") - cli.SetEndpoints(clus.Members[2].GRPCAddr()) + cli.SetEndpoints(clus.Members[2].GRPCURL()) _, err = orderingKv.Get(ctx, "foo", clientv3.WithSerializable()) if err != ordering.ErrNoGreaterRev { t.Fatal("While speaking to partitioned leader, we should get ErrNoGreaterRev error") @@ -84,11 +84,11 @@ func TestUnresolvableOrderViolation(t *testing.T) { defer clus.Terminate(t) cfg := clientv3.Config{ Endpoints: []string{ - clus.Members[0].GRPCAddr(), - clus.Members[1].GRPCAddr(), - clus.Members[2].GRPCAddr(), - clus.Members[3].GRPCAddr(), - clus.Members[4].GRPCAddr(), + clus.Members[0].GRPCURL(), + clus.Members[1].GRPCURL(), + clus.Members[2].GRPCURL(), + clus.Members[3].GRPCURL(), + clus.Members[4].GRPCURL(), }, } cli, err := integration.NewClient(t, cfg) @@ -99,7 +99,7 @@ func TestUnresolvableOrderViolation(t *testing.T) { eps := cli.Endpoints() ctx := context.TODO() - cli.SetEndpoints(clus.Members[0].GRPCAddr()) + cli.SetEndpoints(clus.Members[0].GRPCURL()) time.Sleep(1 * time.Second) _, err = cli.Put(ctx, "foo", "bar") if err != nil { @@ -139,7 +139,7 @@ func TestUnresolvableOrderViolation(t *testing.T) { t.Fatal(err) } clus.Members[3].WaitStarted(t) - cli.SetEndpoints(clus.Members[3].GRPCAddr()) + cli.SetEndpoints(clus.Members[3].GRPCURL()) _, err = OrderingKv.Get(ctx, "foo", clientv3.WithSerializable()) if err != ordering.ErrNoGreaterRev { diff --git a/tests/integration/cluster.go b/tests/integration/cluster.go index f5a391c5f..c854030a6 100644 --- a/tests/integration/cluster.go +++ b/tests/integration/cluster.go @@ -249,7 +249,7 @@ func (c *cluster) Launch(t testutil.TB) { c.waitMembersMatch(t, c.HTTPMembers()) c.waitVersion() for _, m := range c.Members { - t.Logf(" - %v -> %v (%v)", m.Name, m.ID(), m.GRPCAddr()) + t.Logf(" - %v -> %v (%v)", m.Name, m.ID(), m.GRPCURL()) } } @@ -572,7 +572,7 @@ type member struct { grpcServerOpts []grpc.ServerOption grpcServer *grpc.Server grpcServerPeer *grpc.Server - grpcAddr string + grpcURL string grpcBridge *bridge // serverClient is a clientv3 that directly calls the etcdserver. @@ -587,7 +587,7 @@ type member struct { closed bool } -func (m *member) GRPCAddr() string { return m.grpcAddr } +func (m *member) GRPCURL() string { return m.grpcURL } type memberConfig struct { name string @@ -731,28 +731,28 @@ func memberLogger(t testutil.TB, name string) *zap.Logger { // listenGRPC starts a grpc server over a unix domain socket on the member func (m *member) listenGRPC() error { // prefix with localhost so cert has right domain - m.grpcAddr = "localhost:" + m.Name - m.Logger.Info("LISTEN GRPC", zap.String("m.grpcAddr", m.grpcAddr), zap.String("m.Name", m.Name)) + m.grpcURL = "localhost:" + m.Name + m.Logger.Info("LISTEN GRPC", zap.String("m.grpcURL", m.grpcURL), zap.String("m.Name", m.Name)) if m.useIP { // for IP-only TLS certs - m.grpcAddr = "127.0.0.1:" + m.Name + m.grpcURL = "127.0.0.1:" + m.Name } - grpcListener, err := transport.NewUnixListener(m.grpcAddr) + grpcListener, err := transport.NewUnixListener(m.grpcURL) if err != nil { - return fmt.Errorf("listen failed on grpc socket %s (%v)", m.grpcAddr, err) + return fmt.Errorf("listen failed on grpc socket %s (%v)", m.grpcURL, err) } - bridgeAddr := m.grpcAddr + "0" + bridgeAddr := m.grpcURL + "0" bridgeListener, err := transport.NewUnixListener(bridgeAddr) if err != nil { grpcListener.Close() - return fmt.Errorf("listen failed on bridge socket %s (%v)", m.grpcAddr, err) + return fmt.Errorf("listen failed on bridge socket %s (%v)", m.grpcURL, err) } - m.grpcBridge, err = newBridge(dialer{network: "unix", addr: m.grpcAddr}, bridgeListener) + m.grpcBridge, err = newBridge(dialer{network: "unix", addr: m.grpcURL}, bridgeListener) if err != nil { bridgeListener.Close() grpcListener.Close() return err } - m.grpcAddr = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + bridgeAddr + m.grpcURL = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + bridgeAddr m.grpcListener = grpcListener return nil } @@ -780,12 +780,12 @@ func (m *member) Unblackhole() { m.grpcBridge.Unblackhole() } // NewClientV3 creates a new grpc client connection to the member func NewClientV3(m *member) (*clientv3.Client, error) { - if m.grpcAddr == "" { + if m.grpcURL == "" { return nil, fmt.Errorf("member not configured for grpc") } cfg := clientv3.Config{ - Endpoints: []string{m.grpcAddr}, + Endpoints: []string{m.grpcURL}, DialTimeout: 5 * time.Second, DialOptions: []grpc.DialOption{grpc.WithBlock()}, MaxCallSendMsgSize: m.clientMaxCallSendMsgSize, @@ -847,7 +847,7 @@ func (m *member) Launch() error { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), ) var err error if m.s, err = etcdserver.NewServer(m.ServerConfig); err != nil { @@ -1004,7 +1004,7 @@ func (m *member) Launch() error { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), ) return nil } @@ -1117,7 +1117,7 @@ func (m *member) Stop(_ testutil.TB) { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), ) m.Close() m.serverClosers = nil @@ -1126,7 +1126,7 @@ func (m *member) Stop(_ testutil.TB) { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), ) } @@ -1151,7 +1151,7 @@ func (m *member) Restart(t testutil.TB) error { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), ) newPeerListeners := make([]net.Listener, 0) for _, ln := range m.PeerListeners { @@ -1176,7 +1176,7 @@ func (m *member) Restart(t testutil.TB) error { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), zap.Error(err), ) return err @@ -1189,7 +1189,7 @@ func (m *member) Terminate(t testutil.TB) { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), ) m.Close() if !m.keepDataDirTerminate { @@ -1202,7 +1202,7 @@ func (m *member) Terminate(t testutil.TB) { zap.String("name", m.Name), zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), - zap.String("grpc-address", m.grpcAddr), + zap.String("grpc-url", m.grpcURL), ) } diff --git a/tests/integration/proxy/grpcproxy/cluster_test.go b/tests/integration/proxy/grpcproxy/cluster_test.go index 5be35c232..162956444 100644 --- a/tests/integration/proxy/grpcproxy/cluster_test.go +++ b/tests/integration/proxy/grpcproxy/cluster_test.go @@ -36,7 +36,7 @@ func TestClusterProxyMemberList(t *testing.T) { clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) defer clus.Terminate(t) - cts := newClusterProxyServer(zaptest.NewLogger(t), []string{clus.Members[0].GRPCAddr()}, t) + cts := newClusterProxyServer(zaptest.NewLogger(t), []string{clus.Members[0].GRPCURL()}, t) defer cts.close(t) cfg := clientv3.Config{ diff --git a/tests/integration/proxy/grpcproxy/kv_test.go b/tests/integration/proxy/grpcproxy/kv_test.go index 1ff106e4a..4f9ee8d25 100644 --- a/tests/integration/proxy/grpcproxy/kv_test.go +++ b/tests/integration/proxy/grpcproxy/kv_test.go @@ -34,7 +34,7 @@ func TestKVProxyRange(t *testing.T) { clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) defer clus.Terminate(t) - kvts := newKVProxyServer([]string{clus.Members[0].GRPCAddr()}, t) + kvts := newKVProxyServer([]string{clus.Members[0].GRPCURL()}, t) defer kvts.close() // create a client and try to get key from proxy. diff --git a/tests/integration/proxy/grpcproxy/register_test.go b/tests/integration/proxy/grpcproxy/register_test.go index 4fbe08e08..d57d01a87 100644 --- a/tests/integration/proxy/grpcproxy/register_test.go +++ b/tests/integration/proxy/grpcproxy/register_test.go @@ -31,7 +31,7 @@ func TestRegister(t *testing.T) { clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) defer clus.Terminate(t) cli := clus.Client(0) - paddr := clus.Members[0].GRPCAddr() + paddr := clus.Members[0].GRPCURL() testPrefix := "test-name" wa := mustCreateWatcher(t, cli, testPrefix) diff --git a/tests/integration/v3_grpc_test.go b/tests/integration/v3_grpc_test.go index 298ee9428..e54ba26df 100644 --- a/tests/integration/v3_grpc_test.go +++ b/tests/integration/v3_grpc_test.go @@ -1784,7 +1784,7 @@ func testTLSReload( } cli, cerr := NewClient(t, clientv3.Config{ DialOptions: []grpc.DialOption{grpc.WithBlock()}, - Endpoints: []string{clus.Members[0].GRPCAddr()}, + Endpoints: []string{clus.Members[0].GRPCURL()}, DialTimeout: time.Second, TLS: cc, }) @@ -1818,7 +1818,7 @@ func testTLSReload( t.Fatal(terr) } cl, cerr := NewClient(t, clientv3.Config{ - Endpoints: []string{clus.Members[0].GRPCAddr()}, + Endpoints: []string{clus.Members[0].GRPCURL()}, DialTimeout: 5 * time.Second, TLS: tls, }) diff --git a/tests/integration/v3_tls_test.go b/tests/integration/v3_tls_test.go index 4d77bee13..2437df94e 100644 --- a/tests/integration/v3_tls_test.go +++ b/tests/integration/v3_tls_test.go @@ -55,7 +55,7 @@ func testTLSCipherSuites(t *testing.T, valid bool) { t.Fatal(err) } cli, cerr := NewClient(t, clientv3.Config{ - Endpoints: []string{clus.Members[0].GRPCAddr()}, + Endpoints: []string{clus.Members[0].GRPCURL()}, DialTimeout: time.Second, DialOptions: []grpc.DialOption{grpc.WithBlock()}, TLS: cc, From 451eb5d711f1a7ba1f41f32db738ab6d3e301dd8 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 24 Sep 2021 13:55:34 +0200 Subject: [PATCH 29/43] tests: Make using bridge optional --- tests/integration/bridge.go | 8 +-- .../clientv3/connectivity/black_hole_test.go | 2 + .../connectivity/server_shutdown_test.go | 2 + tests/integration/clientv3/kv_test.go | 6 +- .../integration/clientv3/lease/lease_test.go | 16 ++--- .../clientv3/lease/leasing_test.go | 40 ++++++------ .../integration/clientv3/maintenance_test.go | 2 +- .../integration/clientv3/ordering_kv_test.go | 4 +- .../clientv3/ordering_util_test.go | 2 +- tests/integration/clientv3/txn_test.go | 4 +- tests/integration/clientv3/watch_test.go | 12 ++-- tests/integration/cluster.go | 65 +++++++++++++------ tests/integration/cluster_test.go | 14 ++-- tests/integration/member_test.go | 4 +- tests/integration/v3_alarm_test.go | 4 +- tests/integration/v3_grpc_inflight_test.go | 2 +- tests/integration/v3_grpc_test.go | 8 +-- tests/integration/v3_lease_test.go | 11 ++-- tests/integration/v3_watch_test.go | 2 +- 19 files changed, 119 insertions(+), 89 deletions(-) diff --git a/tests/integration/bridge.go b/tests/integration/bridge.go index 22040b882..67d6ae447 100644 --- a/tests/integration/bridge.go +++ b/tests/integration/bridge.go @@ -29,8 +29,8 @@ type Dialer interface { // to disconnect grpc network connections without closing the logical grpc connection. type bridge struct { dialer Dialer - l net.Listener - conns map[*bridgeConn]struct{} + l net.Listener + conns map[*bridgeConn]struct{} stopc chan struct{} pausec chan struct{} @@ -43,8 +43,8 @@ type bridge struct { func newBridge(dialer Dialer, listener net.Listener) (*bridge, error) { b := &bridge{ // bridge "port" is ("%05d%05d0", port, pid) since go1.8 expects the port to be a number - dialer: dialer, - l: listener, + dialer: dialer, + l: listener, conns: make(map[*bridgeConn]struct{}), stopc: make(chan struct{}), pausec: make(chan struct{}), diff --git a/tests/integration/clientv3/connectivity/black_hole_test.go b/tests/integration/clientv3/connectivity/black_hole_test.go index a4415322f..c9a199558 100644 --- a/tests/integration/clientv3/connectivity/black_hole_test.go +++ b/tests/integration/clientv3/connectivity/black_hole_test.go @@ -38,6 +38,7 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) { clus := integration.NewClusterV3(t, &integration.ClusterConfig{ Size: 2, GRPCKeepAliveMinTime: time.Millisecond, // avoid too_many_pings + UseBridge: true, }) defer clus.Terminate(t) @@ -170,6 +171,7 @@ func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Clien clus := integration.NewClusterV3(t, &integration.ClusterConfig{ Size: 2, SkipCreatingClient: true, + UseBridge: true, }) defer clus.Terminate(t) diff --git a/tests/integration/clientv3/connectivity/server_shutdown_test.go b/tests/integration/clientv3/connectivity/server_shutdown_test.go index e7660852b..5b888e6fe 100644 --- a/tests/integration/clientv3/connectivity/server_shutdown_test.go +++ b/tests/integration/clientv3/connectivity/server_shutdown_test.go @@ -35,6 +35,7 @@ func TestBalancerUnderServerShutdownWatch(t *testing.T) { clus := integration.NewClusterV3(t, &integration.ClusterConfig{ Size: 3, SkipCreatingClient: true, + UseBridge: true, }) defer clus.Terminate(t) @@ -278,6 +279,7 @@ func testBalancerUnderServerStopInflightRangeOnRestart(t *testing.T, linearizabl cfg := &integration.ClusterConfig{ Size: 2, SkipCreatingClient: true, + UseBridge: true, } if linearizable { cfg.Size = 3 diff --git a/tests/integration/clientv3/kv_test.go b/tests/integration/clientv3/kv_test.go index ac694aa21..b45240225 100644 --- a/tests/integration/clientv3/kv_test.go +++ b/tests/integration/clientv3/kv_test.go @@ -712,7 +712,7 @@ func TestKVGetRetry(t *testing.T) { integration.BeforeTest(t) clusterSize := 3 - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: clusterSize}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: clusterSize, UseBridge: true}) defer clus.Terminate(t) // because killing leader and following election @@ -765,7 +765,7 @@ func TestKVGetRetry(t *testing.T) { func TestKVPutFailGetRetry(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) kv := clus.Client(0) @@ -876,7 +876,7 @@ func TestKVPutStoppedServerAndClose(t *testing.T) { // in the presence of network errors. func TestKVPutAtMostOnce(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) if _, err := clus.Client(0).Put(context.TODO(), "k", "1"); err != nil { diff --git a/tests/integration/clientv3/lease/lease_test.go b/tests/integration/clientv3/lease/lease_test.go index 326289949..6a6cf7dd3 100644 --- a/tests/integration/clientv3/lease/lease_test.go +++ b/tests/integration/clientv3/lease/lease_test.go @@ -190,7 +190,7 @@ func TestLeaseKeepAliveHandleFailure(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) // TODO: change this line to get a cluster client @@ -416,7 +416,7 @@ func TestLeaseRevokeNewAfterClose(t *testing.T) { func TestLeaseKeepAliveCloseAfterDisconnectRevoke(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) cli := clus.Client(0) @@ -462,7 +462,7 @@ func TestLeaseKeepAliveCloseAfterDisconnectRevoke(t *testing.T) { func TestLeaseKeepAliveInitTimeout(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) cli := clus.Client(0) @@ -495,7 +495,7 @@ func TestLeaseKeepAliveInitTimeout(t *testing.T) { func TestLeaseKeepAliveTTLTimeout(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) cli := clus.Client(0) @@ -530,7 +530,7 @@ func TestLeaseKeepAliveTTLTimeout(t *testing.T) { func TestLeaseTimeToLive(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) c := clus.RandClient() @@ -656,7 +656,7 @@ func TestLeaseLeases(t *testing.T) { func TestLeaseRenewLostQuorum(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) cli := clus.Client(0) @@ -728,7 +728,7 @@ func TestLeaseKeepAliveLoopExit(t *testing.T) { // transient cluster failure. func TestV3LeaseFailureOverlap(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2, UseBridge: true}) defer clus.Terminate(t) numReqs := 5 @@ -782,7 +782,7 @@ func TestV3LeaseFailureOverlap(t *testing.T) { func TestLeaseWithRequireLeader(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2, UseBridge: true}) defer clus.Terminate(t) c := clus.Client(0) diff --git a/tests/integration/clientv3/lease/leasing_test.go b/tests/integration/clientv3/lease/leasing_test.go index 54236be97..aea6b2234 100644 --- a/tests/integration/clientv3/lease/leasing_test.go +++ b/tests/integration/clientv3/lease/leasing_test.go @@ -195,7 +195,7 @@ func TestLeasingPutInvalidateExisting(t *testing.T) { // TestLeasingGetNoLeaseTTL checks a key with a TTL is not leased. func TestLeasingGetNoLeaseTTL(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -224,7 +224,7 @@ func TestLeasingGetNoLeaseTTL(t *testing.T) { // when the etcd cluster is partitioned. func TestLeasingGetSerializable(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -326,7 +326,7 @@ func TestLeasingRevGet(t *testing.T) { // TestLeasingGetWithOpts checks options that can be served through the cache do not depend on the server. func TestLeasingGetWithOpts(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -418,7 +418,7 @@ func TestLeasingConcurrentPut(t *testing.T) { func TestLeasingDisconnectedGet(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -550,7 +550,7 @@ func TestLeasingOverwriteResponse(t *testing.T) { func TestLeasingOwnerPutResponse(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -617,7 +617,7 @@ func TestLeasingTxnOwnerGetRange(t *testing.T) { func TestLeasingTxnOwnerGet(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) client := clus.Client(0) @@ -773,7 +773,7 @@ func TestLeasingTxnOwnerDelete(t *testing.T) { func TestLeasingTxnOwnerIf(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -867,7 +867,7 @@ func TestLeasingTxnOwnerIf(t *testing.T) { func TestLeasingTxnCancel(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) lkv1, closeLKV1, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -1085,7 +1085,7 @@ func TestLeasingTxnRandIfThenOrElse(t *testing.T) { func TestLeasingOwnerPutError(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -1106,7 +1106,7 @@ func TestLeasingOwnerPutError(t *testing.T) { func TestLeasingOwnerDeleteError(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -1127,7 +1127,7 @@ func TestLeasingOwnerDeleteError(t *testing.T) { func TestLeasingNonOwnerPutError(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "pfx/") @@ -1201,7 +1201,7 @@ func testLeasingOwnerDelete(t *testing.T, del clientv3.Op) { func TestLeasingDeleteRangeBounds(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) delkv, closeDelKV, err := leasing.NewKV(clus.Client(0), "0/") @@ -1376,7 +1376,7 @@ func TestLeasingPutGetDeleteConcurrent(t *testing.T) { // disconnected when trying to submit revoke txn. func TestLeasingReconnectOwnerRevoke(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) lkv1, closeLKV1, err1 := leasing.NewKV(clus.Client(0), "foo/") @@ -1437,7 +1437,7 @@ func TestLeasingReconnectOwnerRevoke(t *testing.T) { // disconnected and the watch is compacted. func TestLeasingReconnectOwnerRevokeCompact(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) lkv1, closeLKV1, err1 := leasing.NewKV(clus.Client(0), "foo/") @@ -1490,7 +1490,7 @@ func TestLeasingReconnectOwnerRevokeCompact(t *testing.T) { // not cause inconsistency between the server and the client. func TestLeasingReconnectOwnerConsistency(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "foo/") @@ -1650,7 +1650,7 @@ func TestLeasingTxnAtomicCache(t *testing.T) { // TestLeasingReconnectTxn checks that Txn is resilient to disconnects. func TestLeasingReconnectTxn(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "foo/") @@ -1686,7 +1686,7 @@ func TestLeasingReconnectTxn(t *testing.T) { // not cause inconsistency between the server and the client. func TestLeasingReconnectNonOwnerGet(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "foo/") @@ -1814,7 +1814,7 @@ func TestLeasingDo(t *testing.T) { func TestLeasingTxnOwnerPutBranch(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "foo/") @@ -1908,7 +1908,7 @@ func randCmps(pfx string, dat []*clientv3.PutResponse) (cmps []clientv3.Cmp, the func TestLeasingSessionExpire(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "foo/", concurrency.WithTTL(1)) @@ -1984,7 +1984,7 @@ func TestLeasingSessionExpireCancel(t *testing.T) { for i := range tests { t.Run(fmt.Sprintf("test %d", i), func(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) lkv, closeLKV, err := leasing.NewKV(clus.Client(0), "foo/", concurrency.WithTTL(1)) diff --git a/tests/integration/clientv3/maintenance_test.go b/tests/integration/clientv3/maintenance_test.go index 965599583..e48a4a4fa 100644 --- a/tests/integration/clientv3/maintenance_test.go +++ b/tests/integration/clientv3/maintenance_test.go @@ -192,7 +192,7 @@ func TestMaintenanceSnapshotErrorInflight(t *testing.T) { func testMaintenanceSnapshotErrorInflight(t *testing.T, snapshot func(context.Context, *clientv3.Client) (io.ReadCloser, error)) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) // take about 1-second to read snapshot diff --git a/tests/integration/clientv3/ordering_kv_test.go b/tests/integration/clientv3/ordering_kv_test.go index fdce92495..b6b3ce71f 100644 --- a/tests/integration/clientv3/ordering_kv_test.go +++ b/tests/integration/clientv3/ordering_kv_test.go @@ -30,7 +30,7 @@ func TestDetectKvOrderViolation(t *testing.T) { var errOrderViolation = errors.New("DetectedOrderViolation") integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) cfg := clientv3.Config{ @@ -97,7 +97,7 @@ func TestDetectTxnOrderViolation(t *testing.T) { var errOrderViolation = errors.New("DetectedOrderViolation") integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) cfg := clientv3.Config{ diff --git a/tests/integration/clientv3/ordering_util_test.go b/tests/integration/clientv3/ordering_util_test.go index 85c61f407..a4b65ec38 100644 --- a/tests/integration/clientv3/ordering_util_test.go +++ b/tests/integration/clientv3/ordering_util_test.go @@ -80,7 +80,7 @@ func TestEndpointSwitchResolvesViolation(t *testing.T) { func TestUnresolvableOrderViolation(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 5, SkipCreatingClient: true}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 5, SkipCreatingClient: true, UseBridge: true}) defer clus.Terminate(t) cfg := clientv3.Config{ Endpoints: []string{ diff --git a/tests/integration/clientv3/txn_test.go b/tests/integration/clientv3/txn_test.go index ffe93e096..679b9868f 100644 --- a/tests/integration/clientv3/txn_test.go +++ b/tests/integration/clientv3/txn_test.go @@ -53,7 +53,7 @@ func TestTxnError(t *testing.T) { func TestTxnWriteFail(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) kv := clus.Client(0) @@ -103,7 +103,7 @@ func TestTxnReadRetry(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) kv := clus.Client(0) diff --git a/tests/integration/clientv3/watch_test.go b/tests/integration/clientv3/watch_test.go index 2fea3c9ba..b5a0dd08f 100644 --- a/tests/integration/clientv3/watch_test.go +++ b/tests/integration/clientv3/watch_test.go @@ -47,7 +47,7 @@ type watchctx struct { func runWatchTest(t *testing.T, f watcherTest) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) wclientMember := rand.Intn(3) @@ -348,7 +348,7 @@ func putAndWatch(t *testing.T, wctx *watchctx, key, val string) { func TestWatchResumeInitRev(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) cli := clus.Client(0) @@ -404,7 +404,7 @@ func TestWatchResumeInitRev(t *testing.T) { func TestWatchResumeCompacted(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) // create a waiting watcher at rev 1 @@ -955,7 +955,7 @@ func TestWatchWithCreatedNotification(t *testing.T) { func TestWatchWithCreatedNotificationDropConn(t *testing.T) { integration.BeforeTest(t) - cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer cluster.Terminate(t) client := cluster.RandClient() @@ -1063,7 +1063,7 @@ func TestWatchOverlapDropConnContextCancel(t *testing.T) { func testWatchOverlapContextCancel(t *testing.T, f func(*integration.ClusterV3)) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) n := 100 @@ -1154,7 +1154,7 @@ func TestWatchCancelAndCloseClient(t *testing.T) { // then closes the watcher interface to ensure correct clean up. func TestWatchStressResumeClose(t *testing.T) { integration.BeforeTest(t) - clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) cli := clus.Client(0) diff --git a/tests/integration/cluster.go b/tests/integration/cluster.go index c854030a6..1261bfbe4 100644 --- a/tests/integration/cluster.go +++ b/tests/integration/cluster.go @@ -153,6 +153,9 @@ type ClusterConfig struct { // UseIP is true to use only IP for gRPC requests. UseIP bool + // UseBridge adds bridge between client and grpc server. Should be used in tests that + // want to manipulate connection or require connection not breaking despite server stop/restart. + UseBridge bool EnableLeaseCheckpoint bool LeaseCheckpointInterval time.Duration @@ -313,6 +316,7 @@ func (c *cluster) mustNewMember(t testutil.TB) *member { clientMaxCallSendMsgSize: c.cfg.ClientMaxCallSendMsgSize, clientMaxCallRecvMsgSize: c.cfg.ClientMaxCallRecvMsgSize, useIP: c.cfg.UseIP, + useBridge: c.cfg.UseBridge, enableLeaseCheckpoint: c.cfg.EnableLeaseCheckpoint, leaseCheckpointInterval: c.cfg.LeaseCheckpointInterval, WatchProgressNotifyInterval: c.cfg.WatchProgressNotifyInterval, @@ -582,6 +586,7 @@ type member struct { clientMaxCallSendMsgSize int clientMaxCallRecvMsgSize int useIP bool + useBridge bool isLearner bool closed bool @@ -605,6 +610,7 @@ type memberConfig struct { clientMaxCallSendMsgSize int clientMaxCallRecvMsgSize int useIP bool + useBridge bool enableLeaseCheckpoint bool leaseCheckpointInterval time.Duration WatchProgressNotifyInterval time.Duration @@ -698,6 +704,7 @@ func mustNewMember(t testutil.TB, mcfg memberConfig) *member { m.clientMaxCallSendMsgSize = mcfg.clientMaxCallSendMsgSize m.clientMaxCallRecvMsgSize = mcfg.clientMaxCallRecvMsgSize m.useIP = mcfg.useIP + m.useBridge = mcfg.useBridge m.EnableLeaseCheckpoint = mcfg.enableLeaseCheckpoint m.LeaseCheckpointInterval = mcfg.leaseCheckpointInterval @@ -731,35 +738,53 @@ func memberLogger(t testutil.TB, name string) *zap.Logger { // listenGRPC starts a grpc server over a unix domain socket on the member func (m *member) listenGRPC() error { // prefix with localhost so cert has right domain - m.grpcURL = "localhost:" + m.Name - m.Logger.Info("LISTEN GRPC", zap.String("m.grpcURL", m.grpcURL), zap.String("m.Name", m.Name)) - if m.useIP { // for IP-only TLS certs - m.grpcURL = "127.0.0.1:" + m.Name - } - grpcListener, err := transport.NewUnixListener(m.grpcURL) + grpcAddr := m.grpcAddr() + m.Logger.Info("LISTEN GRPC", zap.String("grpcAddr", grpcAddr), zap.String("m.Name", m.Name)) + grpcListener, err := transport.NewUnixListener(grpcAddr) if err != nil { - return fmt.Errorf("listen failed on grpc socket %s (%v)", m.grpcURL, err) + return fmt.Errorf("listen failed on grpc socket %s (%v)", grpcAddr, err) } - bridgeAddr := m.grpcURL + "0" - bridgeListener, err := transport.NewUnixListener(bridgeAddr) - if err != nil { - grpcListener.Close() - return fmt.Errorf("listen failed on bridge socket %s (%v)", m.grpcURL, err) + m.grpcURL = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + grpcAddr + if m.useBridge { + _, err = m.addBridge() + if err != nil { + grpcListener.Close() + return err + } } - m.grpcBridge, err = newBridge(dialer{network: "unix", addr: m.grpcURL}, bridgeListener) - if err != nil { - bridgeListener.Close() - grpcListener.Close() - return err - } - m.grpcURL = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + bridgeAddr m.grpcListener = grpcListener return nil } +func (m *member) addBridge() (*bridge, error) { + grpcAddr := m.grpcAddr() + bridgeAddr := grpcAddr + "0" + m.Logger.Info("LISTEN BRIDGE", zap.String("grpc-address", bridgeAddr), zap.String("member", m.Name)) + bridgeListener, err := transport.NewUnixListener(bridgeAddr) + if err != nil { + return nil, fmt.Errorf("listen failed on bridge socket %s (%v)", grpcAddr, err) + } + m.grpcBridge, err = newBridge(dialer{network: "unix", addr: grpcAddr}, bridgeListener) + if err != nil { + bridgeListener.Close() + return nil, err + } + m.grpcURL = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + bridgeAddr + return m.grpcBridge, nil +} + +func (m *member) grpcAddr() string { + // prefix with localhost so cert has right domain + addr := "localhost:" + m.Name + if m.useIP { // for IP-only TLS certs + addr = "127.0.0.1:" + m.Name + } + return addr +} + type dialer struct { network string - addr string + addr string } func (d dialer) Dial() (net.Conn, error) { diff --git a/tests/integration/cluster_test.go b/tests/integration/cluster_test.go index e25d77f21..2fb5a18d9 100644 --- a/tests/integration/cluster_test.go +++ b/tests/integration/cluster_test.go @@ -173,7 +173,7 @@ func testDecreaseClusterSize(t *testing.T, size int) { } func TestForceNewCluster(t *testing.T) { - c := NewCluster(t, 3) + c := newCluster(t, &ClusterConfig{Size: 3, UseBridge: true}) c.Launch(t) cc := MustNewHTTPClient(t, []string{c.Members[0].URL()}, nil) kapi := client.NewKeysAPI(cc) @@ -283,7 +283,7 @@ func testIssue2746(t *testing.T, members int) { func TestIssue2904(t *testing.T) { BeforeTest(t) // start 1-member cluster to ensure member 0 is the leader of the cluster. - c := NewCluster(t, 1) + c := newCluster(t, &ClusterConfig{Size: 1, UseBridge: true}) c.Launch(t) defer c.Terminate(t) @@ -319,7 +319,7 @@ func TestIssue2904(t *testing.T) { func TestIssue3699(t *testing.T) { // start a cluster of 3 nodes a, b, c BeforeTest(t) - c := NewCluster(t, 3) + c := newCluster(t, &ClusterConfig{Size: 3, UseBridge: true}) c.Launch(t) defer c.Terminate(t) @@ -371,7 +371,7 @@ func TestIssue3699(t *testing.T) { // TestRejectUnhealthyAdd ensures an unhealthy cluster rejects adding members. func TestRejectUnhealthyAdd(t *testing.T) { BeforeTest(t) - c := NewCluster(t, 3) + c := newCluster(t, &ClusterConfig{Size: 3, UseBridge: true}) for _, m := range c.Members { m.ServerConfig.StrictReconfigCheck = true } @@ -415,7 +415,7 @@ func TestRejectUnhealthyAdd(t *testing.T) { // if quorum will be lost. func TestRejectUnhealthyRemove(t *testing.T) { BeforeTest(t) - c := NewCluster(t, 5) + c := newCluster(t, &ClusterConfig{Size: 5, UseBridge: true}) for _, m := range c.Members { m.ServerConfig.StrictReconfigCheck = true } @@ -464,7 +464,7 @@ func TestRestartRemoved(t *testing.T) { BeforeTest(t) // 1. start single-member cluster - c := NewCluster(t, 1) + c := newCluster(t, &ClusterConfig{Size: 1, UseBridge: true}) for _, m := range c.Members { m.ServerConfig.StrictReconfigCheck = true } @@ -540,7 +540,7 @@ func clusterMustProgress(t *testing.T, membs []*member) { func TestSpeedyTerminate(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 3}) + clus := NewClusterV3(t, &ClusterConfig{Size: 3, UseBridge: true}) // Stop/Restart so requests will time out on lost leaders for i := 0; i < 3; i++ { clus.Members[i].Stop(t) diff --git a/tests/integration/member_test.go b/tests/integration/member_test.go index 5493924c9..99788b757 100644 --- a/tests/integration/member_test.go +++ b/tests/integration/member_test.go @@ -46,7 +46,7 @@ func TestPauseMember(t *testing.T) { func TestRestartMember(t *testing.T) { BeforeTest(t) - c := NewCluster(t, 3) + c := newCluster(t, &ClusterConfig{Size: 3, UseBridge: true}) c.Launch(t) defer c.Terminate(t) @@ -88,7 +88,7 @@ func TestLaunchDuplicateMemberShouldFail(t *testing.T) { func TestSnapshotAndRestartMember(t *testing.T) { BeforeTest(t) - m := mustNewMember(t, memberConfig{name: "snapAndRestartTest"}) + m := mustNewMember(t, memberConfig{name: "snapAndRestartTest", useBridge: true}) m.SnapshotCount = 100 m.Launch() defer m.Terminate(t) diff --git a/tests/integration/v3_alarm_test.go b/tests/integration/v3_alarm_test.go index 55f0366cb..dc2191253 100644 --- a/tests/integration/v3_alarm_test.go +++ b/tests/integration/v3_alarm_test.go @@ -35,7 +35,7 @@ func TestV3StorageQuotaApply(t *testing.T) { BeforeTest(t) quotasize := int64(16 * os.Getpagesize()) - clus := NewClusterV3(t, &ClusterConfig{Size: 2}) + clus := NewClusterV3(t, &ClusterConfig{Size: 2, UseBridge: true}) defer clus.Terminate(t) kvc0 := toGRPC(clus.Client(0)).KV kvc1 := toGRPC(clus.Client(1)).KV @@ -147,7 +147,7 @@ func TestV3AlarmDeactivate(t *testing.T) { func TestV3CorruptAlarm(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 3}) + clus := NewClusterV3(t, &ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) var wg sync.WaitGroup diff --git a/tests/integration/v3_grpc_inflight_test.go b/tests/integration/v3_grpc_inflight_test.go index 9f5085112..7432fb46a 100644 --- a/tests/integration/v3_grpc_inflight_test.go +++ b/tests/integration/v3_grpc_inflight_test.go @@ -61,7 +61,7 @@ func TestV3MaintenanceDefragmentInflightRange(t *testing.T) { // See https://github.com/etcd-io/etcd/issues/7322 for more detail. func TestV3KVInflightRangeRequests(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + clus := NewClusterV3(t, &ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) cli := clus.RandClient() diff --git a/tests/integration/v3_grpc_test.go b/tests/integration/v3_grpc_test.go index e54ba26df..cbedafbe3 100644 --- a/tests/integration/v3_grpc_test.go +++ b/tests/integration/v3_grpc_test.go @@ -88,7 +88,7 @@ func TestV3PutOverwrite(t *testing.T) { // TestPutRestart checks if a put after an unrelated member restart succeeds func TestV3PutRestart(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 3}) + clus := NewClusterV3(t, &ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) kvIdx := rand.Intn(3) @@ -1210,7 +1210,7 @@ func TestV3Hash(t *testing.T) { // TestV3HashRestart ensures that hash stays the same after restart. func TestV3HashRestart(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + clus := NewClusterV3(t, &ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) cli := clus.RandClient() @@ -1243,7 +1243,7 @@ func TestV3StorageQuotaAPI(t *testing.T) { BeforeTest(t) quotasize := int64(16 * os.Getpagesize()) - clus := NewClusterV3(t, &ClusterConfig{Size: 3}) + clus := NewClusterV3(t, &ClusterConfig{Size: 3, UseBridge: true}) // Set a quota on one node clus.Members[0].QuotaBackendBytes = quotasize @@ -1858,7 +1858,7 @@ func TestGRPCRequireLeader(t *testing.T) { func TestGRPCStreamRequireLeader(t *testing.T) { BeforeTest(t) - cfg := ClusterConfig{Size: 3} + cfg := ClusterConfig{Size: 3, UseBridge: true} clus := newClusterV3NoClients(t, &cfg) defer clus.Terminate(t) diff --git a/tests/integration/v3_lease_test.go b/tests/integration/v3_lease_test.go index 08b0ca7bb..1727da65c 100644 --- a/tests/integration/v3_lease_test.go +++ b/tests/integration/v3_lease_test.go @@ -36,7 +36,7 @@ import ( func TestV3LeasePromote(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 3}) + clus := NewClusterV3(t, &ClusterConfig{Size: 3, UseBridge: true}) defer clus.Terminate(t) // create lease @@ -237,6 +237,7 @@ func TestV3LeaseCheckpoint(t *testing.T) { Size: 3, EnableLeaseCheckpoint: true, LeaseCheckpointInterval: leaseInterval, + UseBridge: true, }) defer clus.Terminate(t) @@ -649,7 +650,7 @@ const fiveMinTTL int64 = 300 func TestV3LeaseRecoverAndRevoke(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + clus := NewClusterV3(t, &ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) kvc := toGRPC(clus.Client(0)).KV @@ -700,7 +701,7 @@ func TestV3LeaseRecoverAndRevoke(t *testing.T) { func TestV3LeaseRevokeAndRecover(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + clus := NewClusterV3(t, &ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) kvc := toGRPC(clus.Client(0)).KV @@ -752,7 +753,7 @@ func TestV3LeaseRevokeAndRecover(t *testing.T) { func TestV3LeaseRecoverKeyWithDetachedLease(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + clus := NewClusterV3(t, &ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) kvc := toGRPC(clus.Client(0)).KV @@ -808,7 +809,7 @@ func TestV3LeaseRecoverKeyWithDetachedLease(t *testing.T) { func TestV3LeaseRecoverKeyWithMutipleLease(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + clus := NewClusterV3(t, &ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) kvc := toGRPC(clus.Client(0)).KV diff --git a/tests/integration/v3_watch_test.go b/tests/integration/v3_watch_test.go index b2a31cc2f..fb2f510a6 100644 --- a/tests/integration/v3_watch_test.go +++ b/tests/integration/v3_watch_test.go @@ -1034,7 +1034,7 @@ func TestWatchWithProgressNotify(t *testing.T) { // TestV3WatcMultiOpenhClose opens many watchers concurrently on multiple streams. func TestV3WatchClose(t *testing.T) { BeforeTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + clus := NewClusterV3(t, &ClusterConfig{Size: 1, UseBridge: true}) defer clus.Terminate(t) c := clus.Client(0) From 536475818101fe69f9dd5f4506c8d935c58d0548 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 24 Sep 2021 14:06:20 +0200 Subject: [PATCH 30/43] tests: Cleanup member interface by exposing Bridge directly --- tests/integration/bridge.go | 6 +++--- .../clientv3/connectivity/black_hole_test.go | 8 ++++---- tests/integration/clientv3/kv_test.go | 4 ++-- .../integration/clientv3/lease/leasing_test.go | 12 ++++++------ tests/integration/clientv3/watch_test.go | 18 +++++++++--------- tests/integration/cluster.go | 13 +++++++------ tests/integration/v3_watch_test.go | 2 +- 7 files changed, 32 insertions(+), 31 deletions(-) diff --git a/tests/integration/bridge.go b/tests/integration/bridge.go index 67d6ae447..746168fc7 100644 --- a/tests/integration/bridge.go +++ b/tests/integration/bridge.go @@ -68,7 +68,7 @@ func (b *bridge) Close() { b.wg.Wait() } -func (b *bridge) Reset() { +func (b *bridge) DropConnections() { b.mu.Lock() defer b.mu.Unlock() for bc := range b.conns { @@ -77,13 +77,13 @@ func (b *bridge) Reset() { b.conns = make(map[*bridgeConn]struct{}) } -func (b *bridge) Pause() { +func (b *bridge) PauseConnections() { b.mu.Lock() b.pausec = make(chan struct{}) b.mu.Unlock() } -func (b *bridge) Unpause() { +func (b *bridge) UnpauseConnections() { b.mu.Lock() select { case <-b.pausec: diff --git a/tests/integration/clientv3/connectivity/black_hole_test.go b/tests/integration/clientv3/connectivity/black_hole_test.go index c9a199558..4445c69f6 100644 --- a/tests/integration/clientv3/connectivity/black_hole_test.go +++ b/tests/integration/clientv3/connectivity/black_hole_test.go @@ -77,7 +77,7 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) { // give enough time for balancer resolution time.Sleep(5 * time.Second) - clus.Members[0].Blackhole() + clus.Members[0].Bridge().Blackhole() if _, err = clus.Client(1).Put(context.TODO(), "foo", "bar"); err != nil { t.Fatal(err) @@ -88,12 +88,12 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) { t.Error("took too long to receive watch events") } - clus.Members[0].Unblackhole() + clus.Members[0].Bridge().Unblackhole() // waiting for moving eps[0] out of unhealthy, so that it can be re-pined. time.Sleep(ccfg.DialTimeout) - clus.Members[1].Blackhole() + clus.Members[1].Bridge().Blackhole() // make sure client[0] can connect to eps[0] after remove the blackhole. if _, err = clus.Client(0).Get(context.TODO(), "foo"); err != nil { @@ -196,7 +196,7 @@ func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Clien cli.SetEndpoints(eps...) // blackhole eps[0] - clus.Members[0].Blackhole() + clus.Members[0].Bridge().Blackhole() // With round robin balancer, client will make a request to a healthy endpoint // within a few requests. diff --git a/tests/integration/clientv3/kv_test.go b/tests/integration/clientv3/kv_test.go index b45240225..8dd98466d 100644 --- a/tests/integration/clientv3/kv_test.go +++ b/tests/integration/clientv3/kv_test.go @@ -884,12 +884,12 @@ func TestKVPutAtMostOnce(t *testing.T) { } for i := 0; i < 10; i++ { - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() donec := make(chan struct{}) go func() { defer close(donec) for i := 0; i < 10; i++ { - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() time.Sleep(5 * time.Millisecond) } }() diff --git a/tests/integration/clientv3/lease/leasing_test.go b/tests/integration/clientv3/lease/leasing_test.go index aea6b2234..3e935d8e3 100644 --- a/tests/integration/clientv3/lease/leasing_test.go +++ b/tests/integration/clientv3/lease/leasing_test.go @@ -1510,11 +1510,11 @@ func TestLeasingReconnectOwnerConsistency(t *testing.T) { for i := 0; i < 10; i++ { v := fmt.Sprintf("%d", i) donec := make(chan struct{}) - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() go func() { defer close(donec) for i := 0; i < 20; i++ { - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() time.Sleep(time.Millisecond) } }() @@ -1664,9 +1664,9 @@ func TestLeasingReconnectTxn(t *testing.T) { donec := make(chan struct{}) go func() { defer close(donec) - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() for i := 0; i < 10; i++ { - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() time.Sleep(time.Millisecond) } time.Sleep(10 * time.Millisecond) @@ -1704,11 +1704,11 @@ func TestLeasingReconnectNonOwnerGet(t *testing.T) { n := 0 for i := 0; i < 10; i++ { donec := make(chan struct{}) - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() go func() { defer close(donec) for j := 0; j < 10; j++ { - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() time.Sleep(time.Millisecond) } }() diff --git a/tests/integration/clientv3/watch_test.go b/tests/integration/clientv3/watch_test.go index b5a0dd08f..7a992ecf9 100644 --- a/tests/integration/clientv3/watch_test.go +++ b/tests/integration/clientv3/watch_test.go @@ -188,7 +188,7 @@ func testWatchReconnRequest(t *testing.T, wctx *watchctx) { defer close(donec) // take down watcher connection for { - wctx.clus.Members[wctx.wclientMember].DropConnections() + wctx.clus.Members[wctx.wclientMember].Bridge().DropConnections() select { case <-timer: // spinning on close may live lock reconnection @@ -230,7 +230,7 @@ func testWatchReconnInit(t *testing.T, wctx *watchctx) { if wctx.ch = wctx.w.Watch(context.TODO(), "a"); wctx.ch == nil { t.Fatalf("expected non-nil channel") } - wctx.clus.Members[wctx.wclientMember].DropConnections() + wctx.clus.Members[wctx.wclientMember].Bridge().DropConnections() // watcher should recover putAndWatch(t, wctx, "a", "a") } @@ -247,7 +247,7 @@ func testWatchReconnRunning(t *testing.T, wctx *watchctx) { } putAndWatch(t, wctx, "a", "a") // take down watcher connection - wctx.clus.Members[wctx.wclientMember].DropConnections() + wctx.clus.Members[wctx.wclientMember].Bridge().DropConnections() // watcher should recover putAndWatch(t, wctx, "a", "b") } @@ -368,8 +368,8 @@ func TestWatchResumeInitRev(t *testing.T) { t.Fatalf("got (%v, %v), expected create notification rev=4", resp, ok) } // pause wch - clus.Members[0].DropConnections() - clus.Members[0].PauseConnections() + clus.Members[0].Bridge().DropConnections() + clus.Members[0].Bridge().PauseConnections() select { case resp, ok := <-wch: @@ -378,7 +378,7 @@ func TestWatchResumeInitRev(t *testing.T) { } // resume wch - clus.Members[0].UnpauseConnections() + clus.Members[0].Bridge().UnpauseConnections() select { case resp, ok := <-wch: @@ -968,7 +968,7 @@ func TestWatchWithCreatedNotificationDropConn(t *testing.T) { t.Fatalf("expected created event, got %v", resp) } - cluster.Members[0].DropConnections() + cluster.Members[0].Bridge().DropConnections() // check watch channel doesn't post another watch response. select { @@ -1056,7 +1056,7 @@ func TestWatchOverlapContextCancel(t *testing.T) { func TestWatchOverlapDropConnContextCancel(t *testing.T) { f := func(clus *integration.ClusterV3) { - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() } testWatchOverlapContextCancel(t, f) } @@ -1164,7 +1164,7 @@ func TestWatchStressResumeClose(t *testing.T) { for i := range wchs { wchs[i] = cli.Watch(ctx, "abc") } - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() cancel() if err := cli.Close(); err != nil { t.Fatal(err) diff --git a/tests/integration/cluster.go b/tests/integration/cluster.go index 1261bfbe4..86f3e4593 100644 --- a/tests/integration/cluster.go +++ b/tests/integration/cluster.go @@ -773,6 +773,13 @@ func (m *member) addBridge() (*bridge, error) { return m.grpcBridge, nil } +func (m *member) Bridge() *bridge { + if !m.useBridge { + m.Logger.Panic("Bridge not available. Please configure using bridge before creating cluster.") + } + return m.grpcBridge +} + func (m *member) grpcAddr() string { // prefix with localhost so cert has right domain addr := "localhost:" + m.Name @@ -797,12 +804,6 @@ func (m *member) ElectionTimeout() time.Duration { func (m *member) ID() types.ID { return m.s.ID() } -func (m *member) DropConnections() { m.grpcBridge.Reset() } -func (m *member) PauseConnections() { m.grpcBridge.Pause() } -func (m *member) UnpauseConnections() { m.grpcBridge.Unpause() } -func (m *member) Blackhole() { m.grpcBridge.Blackhole() } -func (m *member) Unblackhole() { m.grpcBridge.Unblackhole() } - // NewClientV3 creates a new grpc client connection to the member func NewClientV3(m *member) (*clientv3.Client, error) { if m.grpcURL == "" { diff --git a/tests/integration/v3_watch_test.go b/tests/integration/v3_watch_test.go index fb2f510a6..323d0d72c 100644 --- a/tests/integration/v3_watch_test.go +++ b/tests/integration/v3_watch_test.go @@ -1062,7 +1062,7 @@ func TestV3WatchClose(t *testing.T) { }() } - clus.Members[0].DropConnections() + clus.Members[0].Bridge().DropConnections() wg.Wait() } From 77cc91e0b2ff5d35f5b407f118d875fb88318ae0 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 24 Sep 2021 14:32:56 +0200 Subject: [PATCH 31/43] test: Use unique number for grpc port --- tests/integration/cluster.go | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/integration/cluster.go b/tests/integration/cluster.go index 86f3e4593..deb82b021 100644 --- a/tests/integration/cluster.go +++ b/tests/integration/cluster.go @@ -73,6 +73,7 @@ const ( basePort = 21000 URLScheme = "unix" URLSchemeTLS = "unixs" + baseGRPCPort = 30000 ) var ( @@ -121,6 +122,10 @@ var ( defaultTokenJWT = fmt.Sprintf("jwt,pub-key=%s,priv-key=%s,sign-method=RS256,ttl=1s", MustAbsPath("../fixtures/server.crt"), MustAbsPath("../fixtures/server.key.insecure")) + + // uniqueNumber is used to generate unique port numbers + // Should only be accessed via atomic package methods. + uniqueNumber int32 ) type ClusterConfig struct { @@ -211,7 +216,7 @@ func newCluster(t testutil.TB, cfg *ClusterConfig) *cluster { c := &cluster{cfg: cfg} ms := make([]*member, cfg.Size) for i := 0; i < cfg.Size; i++ { - ms[i] = c.mustNewMember(t) + ms[i] = c.mustNewMember(t, int32(i)) } c.Members = ms if err := c.fillClusterForMembers(); err != nil { @@ -298,10 +303,11 @@ func (c *cluster) HTTPMembers() []client.Member { return ms } -func (c *cluster) mustNewMember(t testutil.TB) *member { +func (c *cluster) mustNewMember(t testutil.TB, number int32) *member { m := mustNewMember(t, memberConfig{ name: c.generateMemberName(), + memberNumber: number, authToken: c.cfg.AuthToken, peerTLS: c.cfg.PeerTLS, clientTLS: c.cfg.ClientTLS, @@ -332,7 +338,7 @@ func (c *cluster) mustNewMember(t testutil.TB) *member { // addMember return PeerURLs of the added member. func (c *cluster) addMember(t testutil.TB) types.URLs { - m := c.mustNewMember(t) + m := c.mustNewMember(t,0) scheme := schemeFromTLSInfo(c.cfg.PeerTLS) @@ -561,6 +567,8 @@ func NewListenerWithAddr(t testutil.TB, addr string) net.Listener { type member struct { config.ServerConfig + uniqNumber int32 + memberNumber int32 PeerListeners, ClientListeners []net.Listener grpcListener net.Listener // PeerTLSInfo enables peer TLS when set @@ -596,6 +604,7 @@ func (m *member) GRPCURL() string { return m.grpcURL } type memberConfig struct { name string + memberNumber int32 peerTLS *transport.TLSInfo clientTLS *transport.TLSInfo authToken string @@ -620,7 +629,10 @@ type memberConfig struct { // set, it will use https scheme to communicate between peers. func mustNewMember(t testutil.TB, mcfg memberConfig) *member { var err error - m := &member{} + m := &member{ + uniqNumber: atomic.AddInt32(&uniqueNumber, 1), + memberNumber: mcfg.memberNumber, + } peerScheme := schemeFromTLSInfo(mcfg.peerTLS) clientScheme := schemeFromTLSInfo(mcfg.clientTLS) @@ -782,11 +794,11 @@ func (m *member) Bridge() *bridge { func (m *member) grpcAddr() string { // prefix with localhost so cert has right domain - addr := "localhost:" + m.Name + host := "localhost" if m.useIP { // for IP-only TLS certs - addr = "127.0.0.1:" + m.Name + host = "127.0.0.1" } - return addr + return fmt.Sprintf("%s:%d", host, baseGRPCPort + m.uniqNumber * 10 + m.memberNumber) } type dialer struct { @@ -1462,7 +1474,7 @@ func (c *ClusterV3) GetLearnerMembers() ([]*pb.Member, error) { // AddAndLaunchLearnerMember creates a leaner member, adds it to cluster // via v3 MemberAdd API, and then launches the new member. func (c *ClusterV3) AddAndLaunchLearnerMember(t testutil.TB) { - m := c.mustNewMember(t) + m := c.mustNewMember(t, 0) m.isLearner = true scheme := schemeFromTLSInfo(c.cfg.PeerTLS) @@ -1563,7 +1575,7 @@ func (p SortableProtoMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], // MustNewMember creates a new member instance based on the response of V3 Member Add API. func (c *ClusterV3) MustNewMember(t testutil.TB, resp *clientv3.MemberAddResponse) *member { - m := c.mustNewMember(t) + m := c.mustNewMember(t,0) m.isLearner = resp.Member.IsLearner m.NewCluster = false From c3cc22c60d1abad5f052f62180b2fe4aa4daacbb Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 27 Sep 2021 13:01:22 +0200 Subject: [PATCH 32/43] tests: Allow configuring integration tests to use TCP --- tests/integration/cluster.go | 76 +++++++++++++++++++++++-------- tests/integration/v3_grpc_test.go | 3 ++ 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/tests/integration/cluster.go b/tests/integration/cluster.go index deb82b021..025a40580 100644 --- a/tests/integration/cluster.go +++ b/tests/integration/cluster.go @@ -161,6 +161,8 @@ type ClusterConfig struct { // UseBridge adds bridge between client and grpc server. Should be used in tests that // want to manipulate connection or require connection not breaking despite server stop/restart. UseBridge bool + // UseTCP configures server listen on tcp socket. If disabled unix socket is used. + UseTCP bool EnableLeaseCheckpoint bool LeaseCheckpointInterval time.Duration @@ -216,7 +218,7 @@ func newCluster(t testutil.TB, cfg *ClusterConfig) *cluster { c := &cluster{cfg: cfg} ms := make([]*member, cfg.Size) for i := 0; i < cfg.Size; i++ { - ms[i] = c.mustNewMember(t, int32(i)) + ms[i] = c.mustNewMember(t, int64(i)) } c.Members = ms if err := c.fillClusterForMembers(); err != nil { @@ -303,11 +305,11 @@ func (c *cluster) HTTPMembers() []client.Member { return ms } -func (c *cluster) mustNewMember(t testutil.TB, number int32) *member { +func (c *cluster) mustNewMember(t testutil.TB, memberNumber int64) *member { m := mustNewMember(t, memberConfig{ name: c.generateMemberName(), - memberNumber: number, + memberNumber: memberNumber, authToken: c.cfg.AuthToken, peerTLS: c.cfg.PeerTLS, clientTLS: c.cfg.ClientTLS, @@ -323,6 +325,7 @@ func (c *cluster) mustNewMember(t testutil.TB, number int32) *member { clientMaxCallRecvMsgSize: c.cfg.ClientMaxCallRecvMsgSize, useIP: c.cfg.UseIP, useBridge: c.cfg.UseBridge, + useTCP: c.cfg.UseTCP, enableLeaseCheckpoint: c.cfg.EnableLeaseCheckpoint, leaseCheckpointInterval: c.cfg.LeaseCheckpointInterval, WatchProgressNotifyInterval: c.cfg.WatchProgressNotifyInterval, @@ -338,7 +341,7 @@ func (c *cluster) mustNewMember(t testutil.TB, number int32) *member { // addMember return PeerURLs of the added member. func (c *cluster) addMember(t testutil.TB) types.URLs { - m := c.mustNewMember(t,0) + m := c.mustNewMember(t, 0) scheme := schemeFromTLSInfo(c.cfg.PeerTLS) @@ -567,8 +570,8 @@ func NewListenerWithAddr(t testutil.TB, addr string) net.Listener { type member struct { config.ServerConfig - uniqNumber int32 - memberNumber int32 + UniqNumber int64 + MemberNumber int64 PeerListeners, ClientListeners []net.Listener grpcListener net.Listener // PeerTLSInfo enables peer TLS when set @@ -595,6 +598,7 @@ type member struct { clientMaxCallRecvMsgSize int useIP bool useBridge bool + useTCP bool isLearner bool closed bool @@ -604,7 +608,8 @@ func (m *member) GRPCURL() string { return m.grpcURL } type memberConfig struct { name string - memberNumber int32 + uniqNumber int64 + memberNumber int64 peerTLS *transport.TLSInfo clientTLS *transport.TLSInfo authToken string @@ -620,6 +625,7 @@ type memberConfig struct { clientMaxCallRecvMsgSize int useIP bool useBridge bool + useTCP bool enableLeaseCheckpoint bool leaseCheckpointInterval time.Duration WatchProgressNotifyInterval time.Duration @@ -630,8 +636,8 @@ type memberConfig struct { func mustNewMember(t testutil.TB, mcfg memberConfig) *member { var err error m := &member{ - uniqNumber: atomic.AddInt32(&uniqueNumber, 1), - memberNumber: mcfg.memberNumber, + MemberNumber: mcfg.memberNumber, + UniqNumber: atomic.AddInt64(&localListenCount, 1), } peerScheme := schemeFromTLSInfo(mcfg.peerTLS) @@ -717,6 +723,7 @@ func mustNewMember(t testutil.TB, mcfg memberConfig) *member { m.clientMaxCallRecvMsgSize = mcfg.clientMaxCallRecvMsgSize m.useIP = mcfg.useIP m.useBridge = mcfg.useBridge + m.useTCP = mcfg.useTCP m.EnableLeaseCheckpoint = mcfg.enableLeaseCheckpoint m.LeaseCheckpointInterval = mcfg.leaseCheckpointInterval @@ -750,13 +757,14 @@ func memberLogger(t testutil.TB, name string) *zap.Logger { // listenGRPC starts a grpc server over a unix domain socket on the member func (m *member) listenGRPC() error { // prefix with localhost so cert has right domain - grpcAddr := m.grpcAddr() + network, host, port := m.grpcAddr() + grpcAddr := host + ":" + port m.Logger.Info("LISTEN GRPC", zap.String("grpcAddr", grpcAddr), zap.String("m.Name", m.Name)) - grpcListener, err := transport.NewUnixListener(grpcAddr) + grpcListener, err := net.Listen(network, grpcAddr) if err != nil { return fmt.Errorf("listen failed on grpc socket %s (%v)", grpcAddr, err) } - m.grpcURL = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + grpcAddr + m.grpcURL = fmt.Sprintf("%s://%s", m.clientScheme(), grpcAddr) if m.useBridge { _, err = m.addBridge() if err != nil { @@ -768,20 +776,36 @@ func (m *member) listenGRPC() error { return nil } +func (m *member) clientScheme() string { + switch { + case m.useTCP && m.ClientTLSInfo != nil: + return "https" + case m.useTCP && m.ClientTLSInfo == nil: + return "http" + case !m.useTCP && m.ClientTLSInfo != nil: + return "unixs" + case !m.useTCP && m.ClientTLSInfo == nil: + return "unix" + } + m.Logger.Panic("Failed to determine client schema") + return "" +} + func (m *member) addBridge() (*bridge, error) { - grpcAddr := m.grpcAddr() + network, host, port := m.grpcAddr() + grpcAddr := host + ":" + port bridgeAddr := grpcAddr + "0" m.Logger.Info("LISTEN BRIDGE", zap.String("grpc-address", bridgeAddr), zap.String("member", m.Name)) bridgeListener, err := transport.NewUnixListener(bridgeAddr) if err != nil { - return nil, fmt.Errorf("listen failed on bridge socket %s (%v)", grpcAddr, err) + return nil, fmt.Errorf("listen failed on bridge socket %s (%v)", bridgeAddr, err) } - m.grpcBridge, err = newBridge(dialer{network: "unix", addr: grpcAddr}, bridgeListener) + m.grpcBridge, err = newBridge(dialer{network: network, addr: grpcAddr}, bridgeListener) if err != nil { bridgeListener.Close() return nil, err } - m.grpcURL = schemeFromTLSInfo(m.ClientTLSInfo) + "://" + bridgeAddr + m.grpcURL = m.clientScheme() + "://" + bridgeAddr return m.grpcBridge, nil } @@ -792,13 +816,25 @@ func (m *member) Bridge() *bridge { return m.grpcBridge } -func (m *member) grpcAddr() string { +func (m *member) grpcAddr() (network, host, port string) { // prefix with localhost so cert has right domain - host := "localhost" + host = "localhost" if m.useIP { // for IP-only TLS certs host = "127.0.0.1" } - return fmt.Sprintf("%s:%d", host, baseGRPCPort + m.uniqNumber * 10 + m.memberNumber) + network = "unix" + if m.useTCP { + network = "tcp" + } + port = m.Name + if m.useTCP { + port = fmt.Sprintf("%d", GrpcPortNumber(m.UniqNumber, m.MemberNumber)) + } + return network, host, port +} + +func GrpcPortNumber(uniqNumber, memberNumber int64) int64 { + return baseGRPCPort + uniqNumber*10 + memberNumber } type dialer struct { @@ -1575,7 +1611,7 @@ func (p SortableProtoMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], // MustNewMember creates a new member instance based on the response of V3 Member Add API. func (c *ClusterV3) MustNewMember(t testutil.TB, resp *clientv3.MemberAddResponse) *member { - m := c.mustNewMember(t,0) + m := c.mustNewMember(t, 0) m.isLearner = resp.Member.IsLearner m.NewCluster = false diff --git a/tests/integration/v3_grpc_test.go b/tests/integration/v3_grpc_test.go index cbedafbe3..ca9e5c8ad 100644 --- a/tests/integration/v3_grpc_test.go +++ b/tests/integration/v3_grpc_test.go @@ -22,6 +22,7 @@ import ( "math/rand" "os" "reflect" + "strings" "testing" "time" @@ -1601,8 +1602,10 @@ func TestTLSGRPCRejectSecureClient(t *testing.T) { clus.Members[0].ClientTLSInfo = &testTLSInfo clus.Members[0].DialOptions = []grpc.DialOption{grpc.WithBlock()} + clus.Members[0].grpcURL = strings.Replace(clus.Members[0].grpcURL, "http://", "https://", 1) client, err := NewClientV3(clus.Members[0]) if client != nil || err == nil { + client.Close() t.Fatalf("expected no client") } else if err != context.DeadlineExceeded { t.Fatalf("unexpected error (%v)", err) From 90932324b140b84651d22d25ee01805bf2e32337 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 17 Sep 2021 12:10:20 +0200 Subject: [PATCH 33/43] client: Add grpc authority header integration tests --- pkg/grpc_testing/recorder.go | 69 ++++++++ server/embed/etcd.go | 2 +- server/embed/serve.go | 4 +- server/etcdserver/api/v3rpc/grpc.go | 6 +- server/etcdserver/api/v3rpc/interceptor.go | 2 +- tests/integration/cluster.go | 42 ++++- tests/integration/grpc_test.go | 182 +++++++++++++++++++++ 7 files changed, 296 insertions(+), 11 deletions(-) create mode 100644 pkg/grpc_testing/recorder.go create mode 100644 tests/integration/grpc_test.go diff --git a/pkg/grpc_testing/recorder.go b/pkg/grpc_testing/recorder.go new file mode 100644 index 000000000..d6b6d2aac --- /dev/null +++ b/pkg/grpc_testing/recorder.go @@ -0,0 +1,69 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpc_testing + +import ( + "context" + "sync" + + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +type GrpcRecorder struct { + mux sync.RWMutex + requests []RequestInfo +} + +type RequestInfo struct { + FullMethod string + Authority string +} + +func (ri *GrpcRecorder) UnaryInterceptor() grpc.UnaryServerInterceptor { + return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + ri.record(toRequestInfo(ctx, info)) + resp, err := handler(ctx, req) + return resp, err + } +} + +func (ri *GrpcRecorder) RecordedRequests() []RequestInfo { + ri.mux.RLock() + defer ri.mux.RUnlock() + reqs := make([]RequestInfo, len(ri.requests)) + copy(reqs, ri.requests) + return reqs +} + +func toRequestInfo(ctx context.Context, info *grpc.UnaryServerInfo) RequestInfo { + req := RequestInfo{ + FullMethod: info.FullMethod, + } + md, ok := metadata.FromIncomingContext(ctx) + if ok { + as := md.Get(":authority") + if len(as) != 0 { + req.Authority = as[0] + } + } + return req +} + +func (ri *GrpcRecorder) record(r RequestInfo) { + ri.mux.Lock() + defer ri.mux.Unlock() + ri.requests = append(ri.requests, r) +} diff --git a/server/embed/etcd.go b/server/embed/etcd.go index 2e20566de..feb846ea1 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -539,7 +539,7 @@ func (e *Etcd) servePeers() (err error) { for _, p := range e.Peers { u := p.Listener.Addr().String() - gs := v3rpc.Server(e.Server, peerTLScfg) + gs := v3rpc.Server(e.Server, peerTLScfg, nil) m := cmux.New(p.Listener) go gs.Serve(m.Match(cmux.HTTP2())) srv := &http.Server{ diff --git a/server/embed/serve.go b/server/embed/serve.go index 17b55384e..c3e786321 100644 --- a/server/embed/serve.go +++ b/server/embed/serve.go @@ -110,7 +110,7 @@ func (sctx *serveCtx) serve( }() if sctx.insecure { - gs = v3rpc.Server(s, nil, gopts...) + gs = v3rpc.Server(s, nil, nil, gopts...) v3electionpb.RegisterElectionServer(gs, servElection) v3lockpb.RegisterLockServer(gs, servLock) if sctx.serviceRegister != nil { @@ -148,7 +148,7 @@ func (sctx *serveCtx) serve( if tlsErr != nil { return tlsErr } - gs = v3rpc.Server(s, tlscfg, gopts...) + gs = v3rpc.Server(s, tlscfg, nil, gopts...) v3electionpb.RegisterElectionServer(gs, servElection) v3lockpb.RegisterLockServer(gs, servLock) if sctx.serviceRegister != nil { diff --git a/server/etcdserver/api/v3rpc/grpc.go b/server/etcdserver/api/v3rpc/grpc.go index 26c52b385..ea3dd7570 100644 --- a/server/etcdserver/api/v3rpc/grpc.go +++ b/server/etcdserver/api/v3rpc/grpc.go @@ -36,19 +36,21 @@ const ( maxSendBytes = math.MaxInt32 ) -func Server(s *etcdserver.EtcdServer, tls *tls.Config, gopts ...grpc.ServerOption) *grpc.Server { +func Server(s *etcdserver.EtcdServer, tls *tls.Config, interceptor grpc.UnaryServerInterceptor, gopts ...grpc.ServerOption) *grpc.Server { var opts []grpc.ServerOption opts = append(opts, grpc.CustomCodec(&codec{})) if tls != nil { bundle := credentials.NewBundle(credentials.Config{TLSConfig: tls}) opts = append(opts, grpc.Creds(bundle.TransportCredentials())) } - chainUnaryInterceptors := []grpc.UnaryServerInterceptor{ newLogUnaryInterceptor(s), newUnaryInterceptor(s), grpc_prometheus.UnaryServerInterceptor, } + if interceptor != nil { + chainUnaryInterceptors = append(chainUnaryInterceptors, interceptor) + } chainStreamInterceptors := []grpc.StreamServerInterceptor{ newStreamInterceptor(s), diff --git a/server/etcdserver/api/v3rpc/interceptor.go b/server/etcdserver/api/v3rpc/interceptor.go index 0d4d5c329..0d41ef527 100644 --- a/server/etcdserver/api/v3rpc/interceptor.go +++ b/server/etcdserver/api/v3rpc/interceptor.go @@ -76,7 +76,7 @@ func newLogUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerIntercepto startTime := time.Now() resp, err := handler(ctx, req) lg := s.Logger() - if lg != nil { // acquire stats if debug level is enabled or request is expensive + if lg != nil { // acquire stats if debug level is enabled or RequestInfo is expensive defer logUnaryRequestStats(ctx, lg, s.Cfg.WarningUnaryRequestDuration, info, startTime, req, resp) } return resp, err diff --git a/tests/integration/cluster.go b/tests/integration/cluster.go index 025a40580..528bcb902 100644 --- a/tests/integration/cluster.go +++ b/tests/integration/cluster.go @@ -39,6 +39,7 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/client/v2" "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/pkg/v3/grpc_testing" "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/embed" @@ -602,6 +603,8 @@ type member struct { isLearner bool closed bool + + grpcServerRecorder *grpc_testing.GrpcRecorder } func (m *member) GRPCURL() string { return m.grpcURL } @@ -734,7 +737,7 @@ func mustNewMember(t testutil.TB, mcfg memberConfig) *member { m.WarningUnaryRequestDuration = embed.DefaultWarningUnaryRequestDuration m.V2Deprecation = config.V2_DEPR_DEFAULT - + m.grpcServerRecorder = &grpc_testing.GrpcRecorder{} m.Logger = memberLogger(t, mcfg.name) t.Cleanup(func() { // if we didn't cleanup the logger, the consecutive test @@ -947,8 +950,8 @@ func (m *member) Launch() error { return err } } - m.grpcServer = v3rpc.Server(m.s, tlscfg, m.grpcServerOpts...) - m.grpcServerPeer = v3rpc.Server(m.s, peerTLScfg) + m.grpcServer = v3rpc.Server(m.s, tlscfg, m.grpcServerRecorder.UnaryInterceptor(), m.grpcServerOpts...) + m.grpcServerPeer = v3rpc.Server(m.s, peerTLScfg, m.grpcServerRecorder.UnaryInterceptor()) m.serverClient = v3client.New(m.s) lockpb.RegisterLockServer(m.grpcServer, v3lock.NewLockServer(m.serverClient)) epb.RegisterElectionServer(m.grpcServer, v3election.NewElectionServer(m.serverClient)) @@ -1083,6 +1086,10 @@ func (m *member) Launch() error { return nil } +func (m *member) RecordedRequests() []grpc_testing.RequestInfo { + return m.grpcServerRecorder.RecordedRequests() +} + func (m *member) WaitOK(t testutil.TB) { m.WaitStarted(t) for m.s.Leader() == 0 { @@ -1372,8 +1379,9 @@ func (p SortableMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], p[i] type ClusterV3 struct { *cluster - mu sync.Mutex - clients []*clientv3.Client + mu sync.Mutex + clients []*clientv3.Client + clusterClient *clientv3.Client } // NewClusterV3 returns a launched cluster with a grpc client connection @@ -1419,6 +1427,11 @@ func (c *ClusterV3) Terminate(t testutil.TB) { t.Error(err) } } + if c.clusterClient != nil { + if err := c.clusterClient.Close(); err != nil { + t.Error(err) + } + } c.mu.Unlock() c.cluster.Terminate(t) } @@ -1431,6 +1444,25 @@ func (c *ClusterV3) Client(i int) *clientv3.Client { return c.clients[i] } +func (c *ClusterV3) ClusterClient() (client *clientv3.Client, err error) { + if c.clusterClient == nil { + endpoints := []string{} + for _, m := range c.Members { + endpoints = append(endpoints, m.grpcURL) + } + cfg := clientv3.Config{ + Endpoints: endpoints, + DialTimeout: 5 * time.Second, + DialOptions: []grpc.DialOption{grpc.WithBlock()}, + } + c.clusterClient, err = newClientV3(cfg) + if err != nil { + return nil, err + } + } + return c.clusterClient, nil +} + // NewClientV3 creates a new grpc client connection to the member func (c *ClusterV3) NewClientV3(memberIndex int) (*clientv3.Client, error) { return NewClientV3(c.Members[memberIndex]) diff --git a/tests/integration/grpc_test.go b/tests/integration/grpc_test.go new file mode 100644 index 000000000..49cbd1df5 --- /dev/null +++ b/tests/integration/grpc_test.go @@ -0,0 +1,182 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package integration + +import ( + "context" + tls "crypto/tls" + "fmt" + "strings" + "testing" + "time" + + clientv3 "go.etcd.io/etcd/client/v3" + "google.golang.org/grpc" +) + +func TestAuthority(t *testing.T) { + tcs := []struct { + name string + useTCP bool + useTLS bool + // Pattern used to generate endpoints for client. Fields filled + // %d - will be filled with member grpc port + // %s - will be filled with member name + clientURLPattern string + + // Pattern used to validate authority received by server. Fields filled: + // %s - list of endpoints concatenated with ";" + expectAuthorityPattern string + }{ + { + name: "unix:path", + clientURLPattern: "unix:localhost:%s", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "unix://absolute_path", + clientURLPattern: "unix://localhost:%s", + expectAuthorityPattern: "#initially=[%s]", + }, + // "unixs" is not standard schema supported by etcd + { + name: "unixs:absolute_path", + useTLS: true, + clientURLPattern: "unixs:localhost:%s", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "unixs://absolute_path", + useTLS: true, + clientURLPattern: "unixs://localhost:%s", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "http://domain[:port]", + useTCP: true, + clientURLPattern: "http://localhost:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "https://domain[:port]", + useTLS: true, + useTCP: true, + clientURLPattern: "https://localhost:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "http://address[:port]", + useTCP: true, + clientURLPattern: "http://127.0.0.1:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "https://address[:port]", + useTCP: true, + useTLS: true, + clientURLPattern: "https://127.0.0.1:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + } + for _, tc := range tcs { + for _, clusterSize := range []int{1, 3} { + t.Run(fmt.Sprintf("Size: %d, Scenario: %q", clusterSize, tc.name), func(t *testing.T) { + BeforeTest(t) + cfg := ClusterConfig{ + Size: clusterSize, + UseTCP: tc.useTCP, + UseIP: tc.useTCP, + } + cfg, tlsConfig := setupTLS(t, tc.useTLS, cfg) + clus := NewClusterV3(t, &cfg) + defer clus.Terminate(t) + endpoints := templateEndpoints(t, tc.clientURLPattern, clus) + + kv := setupClient(t, tc.clientURLPattern, clus, tlsConfig) + defer kv.Close() + + _, err := kv.Put(context.TODO(), "foo", "bar") + if err != nil { + t.Fatal(err) + } + + assertAuthority(t, fmt.Sprintf(tc.expectAuthorityPattern, strings.Join(endpoints, ";")), clus) + }) + } + } +} + +func setupTLS(t *testing.T, useTLS bool, cfg ClusterConfig) (ClusterConfig, *tls.Config) { + t.Helper() + if useTLS { + cfg.ClientTLS = &testTLSInfo + tlsConfig, err := testTLSInfo.ClientConfig() + if err != nil { + t.Fatal(err) + } + return cfg, tlsConfig + } + return cfg, nil +} + +func setupClient(t *testing.T, endpointPattern string, clus *ClusterV3, tlsConfig *tls.Config) *clientv3.Client { + t.Helper() + endpoints := templateEndpoints(t, endpointPattern, clus) + kv, err := clientv3.New(clientv3.Config{ + Endpoints: endpoints, + DialTimeout: 5 * time.Second, + DialOptions: []grpc.DialOption{grpc.WithBlock()}, + TLS: tlsConfig, + }) + if err != nil { + t.Fatal(err) + } + return kv +} + +func templateEndpoints(t *testing.T, pattern string, clus *ClusterV3) []string { + t.Helper() + endpoints := []string{} + for _, m := range clus.Members { + ent := pattern + if strings.Contains(ent, "%d") { + ent = fmt.Sprintf(ent, GrpcPortNumber(m.UniqNumber, m.MemberNumber)) + } + if strings.Contains(ent, "%s") { + ent = fmt.Sprintf(ent, m.Name) + } + if strings.Contains(ent, "%") { + t.Fatalf("Failed to template pattern, %% symbol left %q", ent) + } + endpoints = append(endpoints, ent) + } + return endpoints +} + +func assertAuthority(t *testing.T, expectedAuthority string, clus *ClusterV3) { + t.Helper() + requestsFound := 0 + for _, m := range clus.Members { + for _, r := range m.RecordedRequests() { + requestsFound++ + if r.Authority != expectedAuthority { + t.Errorf("Got unexpected authority header, member: %q, request: %q, got authority: %q, expected %q", m.Name, r.FullMethod, r.Authority, expectedAuthority) + } + } + } + if requestsFound == 0 { + t.Errorf("Expected at least one request") + } +} From ec419f8613df9d333c661d1b979527f1f57c7838 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 27 Sep 2021 18:09:17 +0200 Subject: [PATCH 34/43] tests: Add grpc authority e2e tests --- tests/e2e/cluster_proxy_test.go | 4 + tests/e2e/cluster_test.go | 2 + tests/e2e/ctl_v3_grpc_test.go | 213 ++++++++++++++++++++++++++++++++ tests/e2e/etcd_process.go | 15 ++- 4 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 tests/e2e/ctl_v3_grpc_test.go diff --git a/tests/e2e/cluster_proxy_test.go b/tests/e2e/cluster_proxy_test.go index f11db67ac..fd7924835 100644 --- a/tests/e2e/cluster_proxy_test.go +++ b/tests/e2e/cluster_proxy_test.go @@ -115,6 +115,10 @@ func (p *proxyEtcdProcess) WithStopSignal(sig os.Signal) os.Signal { return p.etcdProc.WithStopSignal(sig) } +func (p *proxyEtcdProcess) Logs() logsExpect { + return p.etcdProc.Logs() +} + type proxyProc struct { lg *zap.Logger execPath string diff --git a/tests/e2e/cluster_test.go b/tests/e2e/cluster_test.go index 4b3993d5c..eb39b3afe 100644 --- a/tests/e2e/cluster_test.go +++ b/tests/e2e/cluster_test.go @@ -144,6 +144,7 @@ type etcdProcessClusterConfig struct { execPath string dataDirPath string keepDataDir bool + envVars map[string]string clusterSize int @@ -318,6 +319,7 @@ func (cfg *etcdProcessClusterConfig) etcdServerProcessConfigs(tb testing.TB) []* lg: lg, execPath: cfg.execPath, args: args, + envVars: cfg.envVars, tlsArgs: cfg.tlsArgs(), dataDirPath: dataDirPath, keepDataDir: cfg.keepDataDir, diff --git a/tests/e2e/ctl_v3_grpc_test.go b/tests/e2e/ctl_v3_grpc_test.go new file mode 100644 index 000000000..b54b773ec --- /dev/null +++ b/tests/e2e/ctl_v3_grpc_test.go @@ -0,0 +1,213 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !cluster_proxy +// +build !cluster_proxy + +package e2e + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "go.etcd.io/etcd/client/pkg/v3/testutil" +) + +func TestAuthority(t *testing.T) { + tcs := []struct { + name string + useTLS bool + useInsecureTLS bool + // Pattern used to generate endpoints for client. Fields filled + // %d - will be filled with member grpc port + clientURLPattern string + + // Pattern used to validate authority received by server. Fields filled: + // %s - list of endpoints concatenated with ";" + expectAuthorityPattern string + }{ + { + name: "http://domain[:port]", + clientURLPattern: "http://localhost:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "http://address[:port]", + clientURLPattern: "http://127.0.0.1:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "https://domain[:port] insecure", + useTLS: true, + useInsecureTLS: true, + clientURLPattern: "https://localhost:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "https://address[:port] insecure", + useTLS: true, + useInsecureTLS: true, + clientURLPattern: "https://127.0.0.1:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "https://domain[:port]", + useTLS: true, + clientURLPattern: "https://localhost:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + { + name: "https://address[:port]", + useTLS: true, + clientURLPattern: "https://127.0.0.1:%d", + expectAuthorityPattern: "#initially=[%s]", + }, + } + for _, tc := range tcs { + for _, clusterSize := range []int{1, 3} { + t.Run(fmt.Sprintf("Size: %d, Scenario: %q", clusterSize, tc.name), func(t *testing.T) { + BeforeTest(t) + + cfg := newConfigNoTLS() + cfg.clusterSize = clusterSize + if tc.useTLS { + cfg.clientTLS = clientTLS + } + cfg.isClientAutoTLS = tc.useInsecureTLS + // Enable debug mode to get logs with http2 headers (including authority) + cfg.envVars = map[string]string{"GODEBUG": "http2debug=2"} + + epc, err := newEtcdProcessCluster(t, cfg) + if err != nil { + t.Fatalf("could not start etcd process cluster (%v)", err) + } + defer epc.Close() + endpoints := templateEndpoints(t, tc.clientURLPattern, epc) + + client := clusterEtcdctlV3(cfg, endpoints) + err = client.Put("foo", "bar") + if err != nil { + t.Fatal(err) + } + + executeWithTimeout(t, 5*time.Second, func() { + assertAuthority(t, fmt.Sprintf(tc.expectAuthorityPattern, strings.Join(endpoints, ";")), epc) + }) + }) + + } + } +} + +func templateEndpoints(t *testing.T, pattern string, clus *etcdProcessCluster) []string { + t.Helper() + endpoints := []string{} + for i := 0; i < clus.cfg.clusterSize; i++ { + ent := pattern + if strings.Contains(ent, "%d") { + ent = fmt.Sprintf(ent, etcdProcessBasePort+i*5) + } + if strings.Contains(ent, "%") { + t.Fatalf("Failed to template pattern, %% symbol left %q", ent) + } + endpoints = append(endpoints, ent) + } + return endpoints +} + +func assertAuthority(t *testing.T, expectAurhority string, clus *etcdProcessCluster) { + logs := []logsExpect{} + for _, proc := range clus.procs { + logs = append(logs, proc.Logs()) + } + line := firstMatch(t, `http2: decoded hpack field header field ":authority"`, logs...) + line = strings.TrimSuffix(line, "\n") + line = strings.TrimSuffix(line, "\r") + expectLine := fmt.Sprintf(`http2: decoded hpack field header field ":authority" = %q`, expectAurhority) + assert.True(t, strings.HasSuffix(line, expectLine), fmt.Sprintf("Got %q expected suffix %q", line, expectLine)) +} + +func firstMatch(t *testing.T, expectLine string, logs ...logsExpect) string { + t.Helper() + match := make(chan string, len(logs)) + for i := range logs { + go func(l logsExpect) { + line, _ := l.Expect(expectLine) + match <- line + }(logs[i]) + } + return <-match +} + +func executeWithTimeout(t *testing.T, timeout time.Duration, f func()) { + donec := make(chan struct{}) + go func() { + defer close(donec) + f() + }() + + select { + case <-time.After(timeout): + testutil.FatalStack(t, fmt.Sprintf("test timed out after %v", timeout)) + case <-donec: + } +} + +type etcdctlV3 struct { + cfg *etcdProcessClusterConfig + endpoints []string +} + +func clusterEtcdctlV3(cfg *etcdProcessClusterConfig, endpoints []string) *etcdctlV3 { + return &etcdctlV3{ + cfg: cfg, + endpoints: endpoints, + } +} + +func (ctl *etcdctlV3) Put(key, value string) error { + return ctl.runCmd("put", key, value) +} + +func (ctl *etcdctlV3) runCmd(args ...string) error { + cmdArgs := []string{ctlBinPath + "3"} + for k, v := range ctl.flags() { + cmdArgs = append(cmdArgs, fmt.Sprintf("--%s=%s", k, v)) + } + cmdArgs = append(cmdArgs, args...) + return spawnWithExpect(cmdArgs, "OK") +} + +func (ctl *etcdctlV3) flags() map[string]string { + fmap := make(map[string]string) + if ctl.cfg.clientTLS == clientTLS { + if ctl.cfg.isClientAutoTLS { + fmap["insecure-transport"] = "false" + fmap["insecure-skip-tls-verify"] = "true" + } else if ctl.cfg.isClientCRL { + fmap["cacert"] = caPath + fmap["cert"] = revokedCertPath + fmap["key"] = revokedPrivateKeyPath + } else { + fmap["cacert"] = caPath + fmap["cert"] = certPath + fmap["key"] = privateKeyPath + } + } + fmap["endpoints"] = strings.Join(ctl.endpoints, ",") + return fmap +} diff --git a/tests/e2e/etcd_process.go b/tests/e2e/etcd_process.go index c61001cec..6fbb595e0 100644 --- a/tests/e2e/etcd_process.go +++ b/tests/e2e/etcd_process.go @@ -43,6 +43,11 @@ type etcdProcess interface { Close() error WithStopSignal(sig os.Signal) os.Signal Config() *etcdServerProcessConfig + Logs() logsExpect +} + +type logsExpect interface { + Expect(string) (string, error) } type etcdServerProcess struct { @@ -56,6 +61,7 @@ type etcdServerProcessConfig struct { execPath string args []string tlsArgs []string + envVars map[string]string dataDirPath string keepDataDir bool @@ -92,7 +98,7 @@ func (ep *etcdServerProcess) Start() error { panic("already started") } ep.cfg.lg.Info("starting server...", zap.String("name", ep.cfg.name)) - proc, err := spawnCmdWithLogger(ep.cfg.lg, append([]string{ep.cfg.execPath}, ep.cfg.args...), nil) + proc, err := spawnCmdWithLogger(ep.cfg.lg, append([]string{ep.cfg.execPath}, ep.cfg.args...), ep.cfg.envVars) if err != nil { return err } @@ -163,3 +169,10 @@ func (ep *etcdServerProcess) waitReady() error { } func (ep *etcdServerProcess) Config() *etcdServerProcessConfig { return ep.cfg } + +func (ep *etcdServerProcess) Logs() logsExpect { + if ep.proc == nil { + ep.cfg.lg.Panic("Please grap logs before process is stopped") + } + return ep.proc +} From c929a917b6aa5a731a9bcdf8558678a4f6fcb227 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 17 Sep 2021 12:20:59 +0200 Subject: [PATCH 35/43] client: Use first endpoint as http2 authority header --- client/v3/client.go | 18 ++++++++++++++--- tests/e2e/ctl_v3_grpc_test.go | 16 +++++++-------- tests/integration/grpc_test.go | 37 ++++++++++++++++++++++++---------- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/client/v3/client.go b/client/v3/client.go index 530b0399f..c39f00421 100644 --- a/client/v3/client.go +++ b/client/v3/client.go @@ -297,9 +297,7 @@ func (c *Client) dial(creds grpccredentials.TransportCredentials, dopts ...grpc. dctx, cancel = context.WithTimeout(c.ctx, c.cfg.DialTimeout) defer cancel() // TODO: Is this right for cases where grpc.WithBlock() is not set on the dial options? } - - initialEndpoints := strings.Join(c.Endpoints(), ";") - target := fmt.Sprintf("%s://%p/#initially=[%s]", resolver.Schema, c, initialEndpoints) + target := fmt.Sprintf("%s://%p/%s", resolver.Schema, c, authority(c.endpoints[0])) conn, err := grpc.DialContext(dctx, target, opts...) if err != nil { return nil, err @@ -307,6 +305,20 @@ func (c *Client) dial(creds grpccredentials.TransportCredentials, dopts ...grpc. return conn, nil } +func authority(endpoint string) string { + spl := strings.SplitN(endpoint, "://", 2) + if len(spl) < 2 { + if strings.HasPrefix(endpoint, "unix:") { + return endpoint[len("unix:"):] + } + if strings.HasPrefix(endpoint, "unixs:") { + return endpoint[len("unixs:"):] + } + return endpoint + } + return spl[1] +} + func (c *Client) credentialsForEndpoint(ep string) grpccredentials.TransportCredentials { r := endpoint.RequiresCredentials(ep) switch r { diff --git a/tests/e2e/ctl_v3_grpc_test.go b/tests/e2e/ctl_v3_grpc_test.go index b54b773ec..b0f824552 100644 --- a/tests/e2e/ctl_v3_grpc_test.go +++ b/tests/e2e/ctl_v3_grpc_test.go @@ -37,44 +37,44 @@ func TestAuthority(t *testing.T) { clientURLPattern string // Pattern used to validate authority received by server. Fields filled: - // %s - list of endpoints concatenated with ";" + // %d - will be filled with first member grpc port expectAuthorityPattern string }{ { name: "http://domain[:port]", clientURLPattern: "http://localhost:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%d", }, { name: "http://address[:port]", clientURLPattern: "http://127.0.0.1:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "127.0.0.1:%d", }, { name: "https://domain[:port] insecure", useTLS: true, useInsecureTLS: true, clientURLPattern: "https://localhost:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%d", }, { name: "https://address[:port] insecure", useTLS: true, useInsecureTLS: true, clientURLPattern: "https://127.0.0.1:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "127.0.0.1:%d", }, { name: "https://domain[:port]", useTLS: true, clientURLPattern: "https://localhost:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%d", }, { name: "https://address[:port]", useTLS: true, clientURLPattern: "https://127.0.0.1:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "127.0.0.1:%d", }, } for _, tc := range tcs { @@ -105,7 +105,7 @@ func TestAuthority(t *testing.T) { } executeWithTimeout(t, 5*time.Second, func() { - assertAuthority(t, fmt.Sprintf(tc.expectAuthorityPattern, strings.Join(endpoints, ";")), epc) + assertAuthority(t, fmt.Sprintf(tc.expectAuthorityPattern, 20000), epc) }) }) diff --git a/tests/integration/grpc_test.go b/tests/integration/grpc_test.go index 49cbd1df5..eb71191a3 100644 --- a/tests/integration/grpc_test.go +++ b/tests/integration/grpc_test.go @@ -37,57 +37,58 @@ func TestAuthority(t *testing.T) { clientURLPattern string // Pattern used to validate authority received by server. Fields filled: - // %s - list of endpoints concatenated with ";" + // %d - will be filled with first member grpc port + // %s - will be filled with first member name expectAuthorityPattern string }{ { name: "unix:path", clientURLPattern: "unix:localhost:%s", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%s", }, { name: "unix://absolute_path", clientURLPattern: "unix://localhost:%s", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%s", }, // "unixs" is not standard schema supported by etcd { name: "unixs:absolute_path", useTLS: true, clientURLPattern: "unixs:localhost:%s", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%s", }, { name: "unixs://absolute_path", useTLS: true, clientURLPattern: "unixs://localhost:%s", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%s", }, { name: "http://domain[:port]", useTCP: true, clientURLPattern: "http://localhost:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%d", }, { name: "https://domain[:port]", useTLS: true, useTCP: true, clientURLPattern: "https://localhost:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "localhost:%d", }, { name: "http://address[:port]", useTCP: true, clientURLPattern: "http://127.0.0.1:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "127.0.0.1:%d", }, { name: "https://address[:port]", useTCP: true, useTLS: true, clientURLPattern: "https://127.0.0.1:%d", - expectAuthorityPattern: "#initially=[%s]", + expectAuthorityPattern: "127.0.0.1:%d", }, } for _, tc := range tcs { @@ -102,7 +103,6 @@ func TestAuthority(t *testing.T) { cfg, tlsConfig := setupTLS(t, tc.useTLS, cfg) clus := NewClusterV3(t, &cfg) defer clus.Terminate(t) - endpoints := templateEndpoints(t, tc.clientURLPattern, clus) kv := setupClient(t, tc.clientURLPattern, clus, tlsConfig) defer kv.Close() @@ -112,7 +112,7 @@ func TestAuthority(t *testing.T) { t.Fatal(err) } - assertAuthority(t, fmt.Sprintf(tc.expectAuthorityPattern, strings.Join(endpoints, ";")), clus) + assertAuthority(t, templateAuthority(t, tc.expectAuthorityPattern, clus.Members[0]), clus) }) } } @@ -165,6 +165,21 @@ func templateEndpoints(t *testing.T, pattern string, clus *ClusterV3) []string { return endpoints } +func templateAuthority(t *testing.T, pattern string, m *member) string { + t.Helper() + authority := pattern + if strings.Contains(authority, "%d") { + authority = fmt.Sprintf(authority, GrpcPortNumber(m.UniqNumber, m.MemberNumber)) + } + if strings.Contains(authority, "%s") { + authority = fmt.Sprintf(authority, m.Name) + } + if strings.Contains(authority, "%") { + t.Fatalf("Failed to template pattern, %% symbol left %q", authority) + } + return authority +} + func assertAuthority(t *testing.T, expectedAuthority string, clus *ClusterV3) { t.Helper() requestsFound := 0 From d52d7fc21e15bf8d3465fae2f555242ef63ec819 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 29 Sep 2021 13:07:43 +0200 Subject: [PATCH 36/43] Update changelog with information about authority header fix --- CHANGELOG-3.5.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG-3.5.md b/CHANGELOG-3.5.md index 974c1bde6..e2a52bff0 100644 --- a/CHANGELOG-3.5.md +++ b/CHANGELOG-3.5.md @@ -16,6 +16,13 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.0...v3.5.1) and - Fix [self-signed-cert-validity parameter cannot be specified in the config file](https://github.com/etcd-io/etcd/pull/13237). +### etcd client + +- [Fix etcd client sends invalid :authority header](https://github.com/etcd-io/etcd/issues/13192) + +### package clientv3 + +- Endpoints self identify now as `etcd-endpoints://{id}/{authority}` where authority is based on first endpoint passed, for example `etcd-endpoints://0xc0009d8540/localhost:2079`
From aab7829048494ee7bd3083f7a63ea00fb25bef69 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Thu, 30 Sep 2021 08:19:37 -0400 Subject: [PATCH 37/43] Dockerfile: bump debian bullseye-20210927 fixes: CVE-2021-3711, CVE-2021-35942, CVE-2019-9893 Signed-off-by: Sam Batschelet --- Dockerfile-release.amd64 | 3 ++- Dockerfile-release.arm64 | 3 ++- Dockerfile-release.ppc64le | 3 ++- Dockerfile-release.s390x | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Dockerfile-release.amd64 b/Dockerfile-release.amd64 index 9bd425887..ad9bd068f 100644 --- a/Dockerfile-release.amd64 +++ b/Dockerfile-release.amd64 @@ -1,4 +1,5 @@ -FROM k8s.gcr.io/build-image/debian-base:buster-v1.4.0 +# TODO: move to k8s.gcr.io/build-image/debian-base:bullseye-v1.y.z when patched +FROM debian:bullseye-20210927 ADD etcd /usr/local/bin/ ADD etcdctl /usr/local/bin/ diff --git a/Dockerfile-release.arm64 b/Dockerfile-release.arm64 index d04d79041..c023d215f 100644 --- a/Dockerfile-release.arm64 +++ b/Dockerfile-release.arm64 @@ -1,4 +1,5 @@ -FROM k8s.gcr.io/build-image/debian-base-arm64:buster-v1.4.0 +# TODO: move to k8s.gcr.io/build-image/debian-base-arm64:bullseye-1.y.z when patched +FROM arm64v8/debian:bullseye-20210927 ADD etcd /usr/local/bin/ ADD etcdctl /usr/local/bin/ diff --git a/Dockerfile-release.ppc64le b/Dockerfile-release.ppc64le index 51adb7ae3..6e1e94625 100644 --- a/Dockerfile-release.ppc64le +++ b/Dockerfile-release.ppc64le @@ -1,4 +1,5 @@ -FROM k8s.gcr.io/build-image/debian-base-ppc64le:buster-v1.4.0 +# TODO: move to k8s.gcr.io/build-image/debian-base-ppc64le:bullseye-1.y.z when patched +FROM ppc64le/debian:bullseye-20210927 ADD etcd /usr/local/bin/ ADD etcdctl /usr/local/bin/ diff --git a/Dockerfile-release.s390x b/Dockerfile-release.s390x index a96d45534..a98c6452a 100644 --- a/Dockerfile-release.s390x +++ b/Dockerfile-release.s390x @@ -1,4 +1,5 @@ -FROM k8s.gcr.io/build-image/debian-base-s390x:buster-v1.4.0 +# TODO: move to k8s.gcr.io/build-image/debian-base-s390x:bullseye-1.y.z when patched +FROM s390x/debian:bullseye-20210927 ADD etcd /usr/local/bin/ ADD etcdctl /usr/local/bin/ From 2d7a7d7da4dea74f5bbd0248a3a6ed47a042024f Mon Sep 17 00:00:00 2001 From: Geeta Gharpure Date: Sat, 2 Oct 2021 14:52:29 -0700 Subject: [PATCH 38/43] CHANGELOG - etcd_disk_defrag_inflight Add a gauge to indicate if defrag is active --- CHANGELOG-3.5.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG-3.5.md b/CHANGELOG-3.5.md index 974c1bde6..05b5e9a93 100644 --- a/CHANGELOG-3.5.md +++ b/CHANGELOG-3.5.md @@ -122,6 +122,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change. - Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738). - Add [`etcd_debugging_auth_revision`](https://github.com/etcd-io/etcd/commit/f14d2a087f7b0fd6f7980b95b5e0b945109c95f3). - Add [`os_fd_used` and `os_fd_limit` to monitor current OS file descriptors](https://github.com/etcd-io/etcd/pull/12214). +- Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13371). ### etcd server From 741e19fb507622cec418c6c855280bacb591163c Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Sun, 3 Oct 2021 22:22:44 -0400 Subject: [PATCH 39/43] CHANGELOG: 3.4.17 Signed-off-by: Sam Batschelet --- CHANGELOG-3.4.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/CHANGELOG-3.4.md b/CHANGELOG-3.4.md index ded93935e..551f97b1f 100644 --- a/CHANGELOG-3.4.md +++ b/CHANGELOG-3.4.md @@ -7,6 +7,28 @@ The minimum recommended etcd versions to run in **production** are 3.2.28+, 3.3.
+## v3.4.17 (2021-10-03) + +See [code changes](https://github.com/etcd-io/etcd/compare/v3.4.16...v3.4.17) and [v3.4 upgrade guide](https://etcd.io/docs/latest/upgrades/upgrade_3_4/) for any breaking changes. + +### `etcdctl` + +- Fix [etcdctl check datascale command](https://github.com/etcd-io/etcd/pull/11896) to work with https endpoints. + +### gRPC gateway + +- Add [`MaxCallRecvMsgSize`](https://github.com/etcd-io/etcd/pull/13077) support for http client. + +### Dependency + +- Replace [`github.com/dgrijalva/jwt-go with github.com/golang-jwt/jwt'](https://github.com/etcd-io/etcd/pull/13378). + +### Go + +- Compile with [*Go 1.12.17*](https://golang.org/doc/devel/release.html#go1.12). + +
+ ## v3.4.16 (2021-05-11) See [code changes](https://github.com/etcd-io/etcd/compare/v3.4.15...v3.4.16) and [v3.4 upgrade guide](https://etcd.io/docs/latest/upgrades/upgrade_3_4/) for any breaking changes. From 168b074383f81463a8b1fcd018e12f979fa67712 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Sun, 3 Oct 2021 23:43:51 -0400 Subject: [PATCH 40/43] CHANGELOG: 3.3.26 Signed-off-by: Sam Batschelet --- CHANGELOG-3.3.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG-3.3.md b/CHANGELOG-3.3.md index 1f914fad7..b265d8322 100644 --- a/CHANGELOG-3.3.md +++ b/CHANGELOG-3.3.md @@ -8,8 +8,25 @@ The minimum recommended etcd versions to run in **production** are 3.2.28+, 3.3.
+## v3.3.26 (2021-10-03) -## v3.3.25 (2020 TBD) +See [code changes](https://github.com/etcd-io/etcd/compare/v3.3.25...v3.3.26) and [v3.3 upgrade guide](https://etcd.io/docs/latest/upgrades/upgrade_3_3/) for any breaking changes. + +### Package `clientv3` + +- Fix [auth token invalid after watch reconnects](https://github.com/etcd-io/etcd/pull/12264). Get AuthToken automatically when clientConn is ready. + +### Package `fileutil` + +- Fix [constant](https://github.com/etcd-io/etcd/pull/12440) for linux locking. + +### Go + +- Compile with [*Go 1.12.17*](https://golang.org/doc/devel/release.html#go1.12). + +
+ +## v3.3.25 (2020-08-24) See [code changes](https://github.com/etcd-io/etcd/compare/v3.3.23...v3.3.25) and [v3.3 upgrade guide](https://etcd.io/docs/latest/upgrades/upgrade_3_3/) for any breaking changes. From 205720c69688f68d6cee0a2d1a56f49e954f56c6 Mon Sep 17 00:00:00 2001 From: Geeta Gharpure Date: Mon, 4 Oct 2021 12:52:20 -0700 Subject: [PATCH 41/43] CHANGELOG-3.6 Add etcd_disk_defrag_inflight to indicate if defrag is active --- CHANGELOG-3.5.md | 1 - CHANGELOG-3.6.md | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG-3.5.md b/CHANGELOG-3.5.md index 05b5e9a93..974c1bde6 100644 --- a/CHANGELOG-3.5.md +++ b/CHANGELOG-3.5.md @@ -122,7 +122,6 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change. - Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738). - Add [`etcd_debugging_auth_revision`](https://github.com/etcd-io/etcd/commit/f14d2a087f7b0fd6f7980b95b5e0b945109c95f3). - Add [`os_fd_used` and `os_fd_limit` to monitor current OS file descriptors](https://github.com/etcd-io/etcd/pull/12214). -- Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13371). ### etcd server diff --git a/CHANGELOG-3.6.md b/CHANGELOG-3.6.md index 360f25f49..dac73f7a4 100644 --- a/CHANGELOG-3.6.md +++ b/CHANGELOG-3.6.md @@ -28,3 +28,10 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.0...v3.6.0). - Package `mvcc/buckets` was moved to `storage/schema` - Package `wal` was moved to `storage/wal` - Package `datadir` was moved to `storage/datadir` + + +### Metrics, Monitoring + +See [List of metrics](https://etcd.io/docs/latest/metrics/) for all metrics per release. + +- Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13371). From 0804aae7545ea741bd3a935f6cc807ce86f2663c Mon Sep 17 00:00:00 2001 From: Geeta Gharpure Date: Wed, 6 Oct 2021 11:09:50 -0700 Subject: [PATCH 42/43] CHANGELOG updating changelogs for etcd_disk_defrag_inflight backport --- CHANGELOG-3.4.md | 1 + CHANGELOG-3.5.md | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG-3.4.md b/CHANGELOG-3.4.md index 551f97b1f..032760adc 100644 --- a/CHANGELOG-3.4.md +++ b/CHANGELOG-3.4.md @@ -163,6 +163,7 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.4.10...v3.4.11) an ### Metrics, Monitoring - Add [`os_fd_used` and `os_fd_limit` to monitor current OS file descriptors](https://github.com/etcd-io/etcd/pull/12214). +- Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13397). ### Go diff --git a/CHANGELOG-3.5.md b/CHANGELOG-3.5.md index e2a52bff0..9845595a0 100644 --- a/CHANGELOG-3.5.md +++ b/CHANGELOG-3.5.md @@ -129,6 +129,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change. - Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738). - Add [`etcd_debugging_auth_revision`](https://github.com/etcd-io/etcd/commit/f14d2a087f7b0fd6f7980b95b5e0b945109c95f3). - Add [`os_fd_used` and `os_fd_limit` to monitor current OS file descriptors](https://github.com/etcd-io/etcd/pull/12214). +- Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13395). ### etcd server From 3644c9d67ba0316c1c8f7c7d96316bf739234381 Mon Sep 17 00:00:00 2001 From: Aleksandr Razumov Date: Thu, 7 Oct 2021 17:49:13 +0300 Subject: [PATCH 43/43] client/pkg/fileutil: add missing logger to {Create,Touch}DirAll Also populate it to every invocation. --- client/pkg/fileutil/fileutil.go | 10 +++------- client/pkg/fileutil/fileutil_test.go | 6 +++--- client/pkg/transport/listener.go | 2 +- etcdutl/etcdutl/backup_command.go | 2 +- etcdutl/snapshot/v3_snapshot.go | 4 ++-- server/etcdmain/etcd.go | 2 +- server/etcdserver/bootstrap.go | 6 +++--- server/storage/wal/wal.go | 2 +- tests/functional/agent/handler.go | 7 ++++--- tests/functional/agent/utils.go | 6 ++++-- tests/functional/tester/cluster.go | 2 +- tests/functional/tester/cluster_run.go | 2 +- 12 files changed, 25 insertions(+), 26 deletions(-) diff --git a/client/pkg/fileutil/fileutil.go b/client/pkg/fileutil/fileutil.go index e442c3c92..d31ece3e2 100644 --- a/client/pkg/fileutil/fileutil.go +++ b/client/pkg/fileutil/fileutil.go @@ -44,16 +44,12 @@ func IsDirWriteable(dir string) error { // TouchDirAll is similar to os.MkdirAll. It creates directories with 0700 permission if any directory // does not exists. TouchDirAll also ensures the given directory is writable. -func TouchDirAll(dir string) error { +func TouchDirAll(lg *zap.Logger, dir string) error { // If path is already a directory, MkdirAll does nothing and returns nil, so, // first check if dir exist with an expected permission mode. if Exist(dir) { err := CheckDirPermission(dir, PrivateDirMode) if err != nil { - lg, _ := zap.NewProduction() - if lg == nil { - lg = zap.NewExample() - } lg.Warn("check file permission", zap.Error(err)) } } else { @@ -70,8 +66,8 @@ func TouchDirAll(dir string) error { // CreateDirAll is similar to TouchDirAll but returns error // if the deepest directory was not empty. -func CreateDirAll(dir string) error { - err := TouchDirAll(dir) +func CreateDirAll(lg *zap.Logger, dir string) error { + err := TouchDirAll(lg, dir) if err == nil { var ns []string ns, err = ReadDir(dir) diff --git a/client/pkg/fileutil/fileutil_test.go b/client/pkg/fileutil/fileutil_test.go index 3a761ff9a..180416382 100644 --- a/client/pkg/fileutil/fileutil_test.go +++ b/client/pkg/fileutil/fileutil_test.go @@ -67,7 +67,7 @@ func TestCreateDirAll(t *testing.T) { defer os.RemoveAll(tmpdir) tmpdir2 := filepath.Join(tmpdir, "testdir") - if err = CreateDirAll(tmpdir2); err != nil { + if err = CreateDirAll(zaptest.NewLogger(t), tmpdir2); err != nil { t.Fatal(err) } @@ -75,7 +75,7 @@ func TestCreateDirAll(t *testing.T) { t.Fatal(err) } - if err = CreateDirAll(tmpdir2); err == nil || !strings.Contains(err.Error(), "to be empty, got") { + if err = CreateDirAll(zaptest.NewLogger(t), tmpdir2); err == nil || !strings.Contains(err.Error(), "to be empty, got") { t.Fatalf("unexpected error %v", err) } } @@ -186,7 +186,7 @@ func TestDirPermission(t *testing.T) { tmpdir2 := filepath.Join(tmpdir, "testpermission") // create a new dir with 0700 - if err = CreateDirAll(tmpdir2); err != nil { + if err = CreateDirAll(zaptest.NewLogger(t), tmpdir2); err != nil { t.Fatal(err) } // check dir permission with mode different than created dir diff --git a/client/pkg/transport/listener.go b/client/pkg/transport/listener.go index 992c773ea..2a5ec9a02 100644 --- a/client/pkg/transport/listener.go +++ b/client/pkg/transport/listener.go @@ -192,7 +192,7 @@ func SelfCert(lg *zap.Logger, dirpath string, hosts []string, selfSignedCertVali ) return } - err = fileutil.TouchDirAll(dirpath) + err = fileutil.TouchDirAll(lg, dirpath) if err != nil { if info.Logger != nil { info.Logger.Warn( diff --git a/etcdutl/etcdutl/backup_command.go b/etcdutl/etcdutl/backup_command.go index 54fa68e1d..13e6eb76b 100644 --- a/etcdutl/etcdutl/backup_command.go +++ b/etcdutl/etcdutl/backup_command.go @@ -118,7 +118,7 @@ func HandleBackup(withV3 bool, srcDir string, destDir string, srcWAL string, des destWAL = datadir.ToWalDir(destDir) } - if err := fileutil.CreateDirAll(destSnap); err != nil { + if err := fileutil.CreateDirAll(lg, destSnap); err != nil { lg.Fatal("failed creating backup snapshot dir", zap.String("dest-snap", destSnap), zap.Error(err)) } diff --git a/etcdutl/snapshot/v3_snapshot.go b/etcdutl/snapshot/v3_snapshot.go index f6445498c..cf7b4a6eb 100644 --- a/etcdutl/snapshot/v3_snapshot.go +++ b/etcdutl/snapshot/v3_snapshot.go @@ -333,7 +333,7 @@ func (s *v3Manager) copyAndVerifyDB() error { return err } - if err := fileutil.CreateDirAll(s.snapDir); err != nil { + if err := fileutil.CreateDirAll(s.lg, s.snapDir); err != nil { return err } @@ -394,7 +394,7 @@ func (s *v3Manager) copyAndVerifyDB() error { // // TODO: This code ignores learners !!! func (s *v3Manager) saveWALAndSnap() (*raftpb.HardState, error) { - if err := fileutil.CreateDirAll(s.walDir); err != nil { + if err := fileutil.CreateDirAll(s.lg, s.walDir); err != nil { return nil, err } diff --git a/server/etcdmain/etcd.go b/server/etcdmain/etcd.go index eb2585699..9df267bda 100644 --- a/server/etcdmain/etcd.go +++ b/server/etcdmain/etcd.go @@ -275,7 +275,7 @@ func startProxy(cfg *config) error { } cfg.ec.Dir = filepath.Join(cfg.ec.Dir, "proxy") - err = fileutil.TouchDirAll(cfg.ec.Dir) + err = fileutil.TouchDirAll(lg, cfg.ec.Dir) if err != nil { return err } diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index f1f74fea9..2cf4dfeb1 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -60,11 +60,11 @@ func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { ) } - if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil { + if terr := fileutil.TouchDirAll(cfg.Logger, cfg.DataDir); terr != nil { return nil, fmt.Errorf("cannot access data directory: %v", terr) } - if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { + if terr := fileutil.TouchDirAll(cfg.Logger, cfg.MemberDir()); terr != nil { return nil, fmt.Errorf("cannot access member directory: %v", terr) } ss := bootstrapSnapshot(cfg) @@ -179,7 +179,7 @@ func bootstrapStorage(cfg config.ServerConfig, st v2store.Store, be *bootstrappe } func bootstrapSnapshot(cfg config.ServerConfig) *snap.Snapshotter { - if err := fileutil.TouchDirAll(cfg.SnapDir()); err != nil { + if err := fileutil.TouchDirAll(cfg.Logger, cfg.SnapDir()); err != nil { cfg.Logger.Fatal( "failed to create snapshot directory", zap.String("path", cfg.SnapDir()), diff --git a/server/storage/wal/wal.go b/server/storage/wal/wal.go index 83c10d46f..a24c9d406 100644 --- a/server/storage/wal/wal.go +++ b/server/storage/wal/wal.go @@ -116,7 +116,7 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) { } defer os.RemoveAll(tmpdirpath) - if err := fileutil.CreateDirAll(tmpdirpath); err != nil { + if err := fileutil.CreateDirAll(lg, tmpdirpath); err != nil { lg.Warn( "failed to create a temporary WAL directory", zap.String("tmp-dir-path", tmpdirpath), diff --git a/tests/functional/agent/handler.go b/tests/functional/agent/handler.go index 0be6de7cf..b19c12fe6 100644 --- a/tests/functional/agent/handler.go +++ b/tests/functional/agent/handler.go @@ -474,7 +474,7 @@ func (srv *Server) handle_INITIAL_START_ETCD(req *rpcpb.Request) (*rpcpb.Respons }, nil } - err := fileutil.TouchDirAll(srv.Member.BaseDir) + err := fileutil.TouchDirAll(srv.lg, srv.Member.BaseDir) if err != nil { return nil, err } @@ -509,7 +509,7 @@ func (srv *Server) handle_INITIAL_START_ETCD(req *rpcpb.Request) (*rpcpb.Respons func (srv *Server) handle_RESTART_ETCD(req *rpcpb.Request) (*rpcpb.Response, error) { var err error if !fileutil.Exist(srv.Member.BaseDir) { - err = fileutil.TouchDirAll(srv.Member.BaseDir) + err = fileutil.TouchDirAll(srv.lg, srv.Member.BaseDir) if err != nil { return nil, err } @@ -580,7 +580,7 @@ func (srv *Server) handle_SIGQUIT_ETCD_AND_REMOVE_DATA() (*rpcpb.Response, error // create a new log file for next new member restart if !fileutil.Exist(srv.Member.BaseDir) { - err = fileutil.TouchDirAll(srv.Member.BaseDir) + err = fileutil.TouchDirAll(srv.lg, srv.Member.BaseDir) if err != nil { return nil, err } @@ -652,6 +652,7 @@ func (srv *Server) handle_SIGQUIT_ETCD_AND_ARCHIVE_DATA() (*rpcpb.Response, erro // TODO: support separate WAL directory if err = archive( + srv.lg, srv.Member.BaseDir, srv.Member.Etcd.LogOutputs[0], srv.Member.Etcd.DataDir, diff --git a/tests/functional/agent/utils.go b/tests/functional/agent/utils.go index 37a2c7adf..2cd888ed0 100644 --- a/tests/functional/agent/utils.go +++ b/tests/functional/agent/utils.go @@ -25,15 +25,17 @@ import ( "time" "go.etcd.io/etcd/client/pkg/v3/fileutil" + + "go.uber.org/zap" ) // TODO: support separate WAL directory -func archive(baseDir, etcdLogPath, dataDir string) error { +func archive(lg *zap.Logger, baseDir, etcdLogPath, dataDir string) error { dir := filepath.Join(baseDir, "etcd-failure-archive", time.Now().Format(time.RFC3339)) if existDir(dir) { dir = filepath.Join(baseDir, "etcd-failure-archive", time.Now().Add(time.Second).Format(time.RFC3339)) } - if err := fileutil.TouchDirAll(dir); err != nil { + if err := fileutil.TouchDirAll(lg, dir); err != nil { return err } diff --git a/tests/functional/tester/cluster.go b/tests/functional/tester/cluster.go index bd49a9198..2ec441d9f 100644 --- a/tests/functional/tester/cluster.go +++ b/tests/functional/tester/cluster.go @@ -524,7 +524,7 @@ func (clus *Cluster) sendOpWithResp(idx int, op rpcpb.Operation) (*rpcpb.Respons "fixtures", "client", ) - if err = fileutil.TouchDirAll(dirClient); err != nil { + if err = fileutil.TouchDirAll(clus.lg, dirClient); err != nil { return nil, err } diff --git a/tests/functional/tester/cluster_run.go b/tests/functional/tester/cluster_run.go index 6cab58331..dfed66878 100644 --- a/tests/functional/tester/cluster_run.go +++ b/tests/functional/tester/cluster_run.go @@ -38,7 +38,7 @@ func (clus *Cluster) Run(t *testing.T) { // needs to obtain all the failpoints from the etcd member. clus.updateCases() - if err := fileutil.TouchDirAll(clus.Tester.DataDir); err != nil { + if err := fileutil.TouchDirAll(clus.lg, clus.Tester.DataDir); err != nil { clus.lg.Panic( "failed to create test data directory", zap.String("dir", clus.Tester.DataDir),