From 6974fc63ed8754537252289d6d5fcc64b45b5ac1 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 10 Sep 2015 15:45:34 +0900 Subject: [PATCH 1/3] etcdserver: avoid deadlock caused by adding members with wrong peer URLs Current membership changing functionality of etcd seems to have a problem which can cause deadlock. How to produce: 1. construct N node cluster 2. add N new nodes with etcdctl member add, without starting the new members What happens: After finishing add N nodes, a total number of the cluster becomes 2 * N and a quorum number of the cluster becomes N + 1. It means membership change requires at least N + 1 nodes because Raft treats membership information in its log like other ordinal log append requests. Assume the peer URLs of the added nodes are wrong because of miss operation or bugs in wrapping program which launch etcd. In such a case, both of adding and removing members are impossible because the quorum isn't preserved. Of course ordinal requests cannot be served. The cluster would seem to be deadlock. Of course, the best practice of adding new nodes is adding one node and let the node start one by one. However, the effect of this problem is so serious. I think preventing the problem forcibly would be valuable. Solution: This patch lets etcd forbid adding a new node if the operation changes quorum and the number of changed quorum is larger than a number of running nodes. If etcd is launched with a newly added option -strict-reconfig-check, the checking logic is activated. If the option isn't passed, default behavior of reconfig is kept. Fixes https://github.com/coreos/etcd/issues/3477 --- etcdmain/config.go | 2 ++ etcdmain/etcd.go | 1 + etcdserver/cluster.go | 28 ++++++++++++++++++++++++++++ etcdserver/config.go | 2 ++ etcdserver/errors.go | 1 + etcdserver/member.go | 4 ++++ etcdserver/server.go | 6 ++++++ etcdserver/server_test.go | 1 + 8 files changed, 45 insertions(+) diff --git a/etcdmain/config.go b/etcdmain/config.go index c8e305643..54c2e2109 100644 --- a/etcdmain/config.go +++ b/etcdmain/config.go @@ -95,6 +95,7 @@ type config struct { fallback *flags.StringsFlag initialCluster string initialClusterToken string + strictReconfigCheck bool // proxy proxy *flags.StringsFlag @@ -177,6 +178,7 @@ func NewConfig() *config { // Should never happen. plog.Panicf("unexpected error setting up clusterStateFlag: %v", err) } + fs.BoolVar(&cfg.strictReconfigCheck, "strict-reconfig-check", false, "Reject reconfiguration that might cause quorum loss") // proxy fs.Var(cfg.proxy, "proxy", fmt.Sprintf("Valid values include %s", strings.Join(cfg.proxy.Values, ", "))) diff --git a/etcdmain/etcd.go b/etcdmain/etcd.go index e4cb26a8d..30aa717d8 100644 --- a/etcdmain/etcd.go +++ b/etcdmain/etcd.go @@ -265,6 +265,7 @@ func startEtcd(cfg *config) (<-chan struct{}, error) { TickMs: cfg.TickMs, ElectionTicks: cfg.electionTicks(), V3demo: cfg.v3demo, + StrictReconfigCheck: cfg.strictReconfigCheck, } var s *etcdserver.EtcdServer s, err = etcdserver.NewServer(srvcfg) diff --git a/etcdserver/cluster.go b/etcdserver/cluster.go index 087b8122b..a14673d1e 100644 --- a/etcdserver/cluster.go +++ b/etcdserver/cluster.go @@ -368,6 +368,34 @@ func (c *cluster) SetVersion(ver *semver.Version) { MustDetectDowngrade(c.version) } +func (c *cluster) isReadyToAddNewMember() bool { + nmembers := 1 + nstarted := 0 + + for _, member := range c.members { + if member.IsStarted() { + nstarted++ + } + nmembers++ + } + + if nstarted == 1 { + // a case of adding a new node to 1-member cluster for restoring cluster data + // https://github.com/coreos/etcd/blob/master/Documentation/admin_guide.md#restoring-the-cluster + + plog.Debugf("The number of started member is 1. This cluster can accept add member request.") + return true + } + + nquorum := nmembers/2 + 1 + if nstarted < nquorum { + plog.Warningf("Reject add member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum) + return false + } + + return true +} + func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) { members := make(map[types.ID]*Member) removed := make(map[types.ID]bool) diff --git a/etcdserver/config.go b/etcdserver/config.go index 9ed6b6d66..decb5c1d0 100644 --- a/etcdserver/config.go +++ b/etcdserver/config.go @@ -49,6 +49,8 @@ type ServerConfig struct { ElectionTicks int V3demo bool + + StrictReconfigCheck bool } // VerifyBootstrapConfig sanity-checks the initial config for bootstrap case diff --git a/etcdserver/errors.go b/etcdserver/errors.go index 22b443aba..fc380db35 100644 --- a/etcdserver/errors.go +++ b/etcdserver/errors.go @@ -31,6 +31,7 @@ var ( ErrTimeout = errors.New("etcdserver: request timed out") ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure") ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost") + ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members") ) func isKeyNotFound(err error) bool { diff --git a/etcdserver/member.go b/etcdserver/member.go index 2d1c841e8..6e9fe6bc3 100644 --- a/etcdserver/member.go +++ b/etcdserver/member.go @@ -105,6 +105,10 @@ func (m *Member) Clone() *Member { return mm } +func (m *Member) IsStarted() bool { + return len(m.Name) != 0 +} + func memberStoreKey(id types.ID) string { return path.Join(storeMembersPrefix, id.String()) } diff --git a/etcdserver/server.go b/etcdserver/server.go index 61d4c8aae..ff093f60c 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -603,6 +603,12 @@ func (s *EtcdServer) LeaderStats() []byte { func (s *EtcdServer) StoreStats() []byte { return s.store.JsonStats() } func (s *EtcdServer) AddMember(ctx context.Context, memb Member) error { + if s.cfg.StrictReconfigCheck && !s.cluster.isReadyToAddNewMember() { + // If s.cfg.StrictReconfigCheck is false, it means the option -strict-reconfig-check isn't passed to etcd. + // In such a case adding a new member is allowed unconditionally + return ErrNotEnoughStartedMembers + } + // TODO: move Member to protobuf type b, err := json.Marshal(memb) if err != nil { diff --git a/etcdserver/server_test.go b/etcdserver/server_test.go index f88a7c8c8..3cd801c63 100644 --- a/etcdserver/server_test.go +++ b/etcdserver/server_test.go @@ -866,6 +866,7 @@ func TestAddMember(t *testing.T) { store: st, cluster: cl, reqIDGen: idutil.NewGenerator(0, time.Time{}), + cfg: &ServerConfig{}, } s.start() m := Member{ID: 1234, RaftAttributes: RaftAttributes{PeerURLs: []string{"foo"}}} From d9cf752060f03cd07a3fe688255ed15f078eeb46 Mon Sep 17 00:00:00 2001 From: Jonathan Boulle Date: Sat, 12 Sep 2015 18:11:23 -0700 Subject: [PATCH 2/3] etcdserver: add test for isReadyToAddNewMember Also fixed check for special case of one-member cluster --- etcdserver/cluster.go | 2 +- etcdserver/cluster_test.go | 65 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/etcdserver/cluster.go b/etcdserver/cluster.go index a14673d1e..8cf90b535 100644 --- a/etcdserver/cluster.go +++ b/etcdserver/cluster.go @@ -379,7 +379,7 @@ func (c *cluster) isReadyToAddNewMember() bool { nmembers++ } - if nstarted == 1 { + if nstarted == 1 && nmembers == 2 { // a case of adding a new node to 1-member cluster for restoring cluster data // https://github.com/coreos/etcd/blob/master/Documentation/admin_guide.md#restoring-the-cluster diff --git a/etcdserver/cluster_test.go b/etcdserver/cluster_test.go index fa571e10d..cecb736c0 100644 --- a/etcdserver/cluster_test.go +++ b/etcdserver/cluster_test.go @@ -566,3 +566,68 @@ func newTestCluster(membs []*Member) *cluster { } func stringp(s string) *string { return &s } + +func TestIsReadyToAddNewMember(t *testing.T) { + tests := []struct { + members []*Member + want bool + }{ + { + // 0/3 members ready, should fail + []*Member{ + newTestMember(1, nil, "", nil), + newTestMember(2, nil, "", nil), + newTestMember(3, nil, "", nil), + }, + false, + }, + { + // 1/2 members ready, should fail + []*Member{ + newTestMember(1, nil, "1", nil), + newTestMember(2, nil, "", nil), + }, + false, + }, + { + // 1/3 members ready, should fail + []*Member{ + newTestMember(1, nil, "1", nil), + newTestMember(2, nil, "", nil), + newTestMember(3, nil, "", nil), + }, + false, + }, + { + // 1/1 members ready, should succeed (special case of 1-member cluster for recovery) + []*Member{ + newTestMember(1, nil, "1", nil), + }, + true, + }, + { + // 2/3 members ready, should fail + []*Member{ + newTestMember(1, nil, "1", nil), + newTestMember(2, nil, "2", nil), + newTestMember(3, nil, "", nil), + }, + false, + }, + { + // 3/3 members ready, should be fine to add one member and retain quorum + []*Member{ + newTestMember(1, nil, "1", nil), + newTestMember(2, nil, "2", nil), + newTestMember(3, nil, "3", nil), + }, + true, + }, + } + for i, tt := range tests { + c := newTestCluster(tt.members) + if got := c.isReadyToAddNewMember(); got != tt.want { + t.Errorf("%d: isReadyToAddNewMember returned %t, want %t", i, got, tt.want) + } + } +} From dad32646eb1918c4208c8ac4807cd9e0a2ebe7b7 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sun, 13 Sep 2015 11:50:58 +0900 Subject: [PATCH 3/3] etcdserver: enhance test cases for isReadyToAddNewMember - a case of a cluster with even number members - a case of an empty cluster --- etcdserver/cluster_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/etcdserver/cluster_test.go b/etcdserver/cluster_test.go index cecb736c0..80faf4f1b 100644 --- a/etcdserver/cluster_test.go +++ b/etcdserver/cluster_test.go @@ -623,6 +623,21 @@ func TestIsReadyToAddNewMember(t *testing.T) { }, true, }, + { + // 3/4 members ready, should be fine to add one member and retain quorum + []*Member{ + newTestMember(1, nil, "1", nil), + newTestMember(2, nil, "2", nil), + newTestMember(3, nil, "3", nil), + newTestMember(4, nil, "", nil), + }, + true, + }, + { + // empty cluster, it is impossible but should fail + []*Member{}, + false, + }, } for i, tt := range tests { c := newTestCluster(tt.members)