mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #3479 from mitake/membership
etcdserver: avoid deadlock caused by adding members with wrong peer URLs
This commit is contained in:
commit
0ca800fbac
@ -95,6 +95,7 @@ type config struct {
|
||||
fallback *flags.StringsFlag
|
||||
initialCluster string
|
||||
initialClusterToken string
|
||||
strictReconfigCheck bool
|
||||
|
||||
// proxy
|
||||
proxy *flags.StringsFlag
|
||||
@ -177,6 +178,7 @@ func NewConfig() *config {
|
||||
// Should never happen.
|
||||
plog.Panicf("unexpected error setting up clusterStateFlag: %v", err)
|
||||
}
|
||||
fs.BoolVar(&cfg.strictReconfigCheck, "strict-reconfig-check", false, "Reject reconfiguration that might cause quorum loss")
|
||||
|
||||
// proxy
|
||||
fs.Var(cfg.proxy, "proxy", fmt.Sprintf("Valid values include %s", strings.Join(cfg.proxy.Values, ", ")))
|
||||
|
@ -265,6 +265,7 @@ func startEtcd(cfg *config) (<-chan struct{}, error) {
|
||||
TickMs: cfg.TickMs,
|
||||
ElectionTicks: cfg.electionTicks(),
|
||||
V3demo: cfg.v3demo,
|
||||
StrictReconfigCheck: cfg.strictReconfigCheck,
|
||||
}
|
||||
var s *etcdserver.EtcdServer
|
||||
s, err = etcdserver.NewServer(srvcfg)
|
||||
|
@ -368,6 +368,34 @@ func (c *cluster) SetVersion(ver *semver.Version) {
|
||||
MustDetectDowngrade(c.version)
|
||||
}
|
||||
|
||||
func (c *cluster) isReadyToAddNewMember() bool {
|
||||
nmembers := 1
|
||||
nstarted := 0
|
||||
|
||||
for _, member := range c.members {
|
||||
if member.IsStarted() {
|
||||
nstarted++
|
||||
}
|
||||
nmembers++
|
||||
}
|
||||
|
||||
if nstarted == 1 && nmembers == 2 {
|
||||
// a case of adding a new node to 1-member cluster for restoring cluster data
|
||||
// https://github.com/coreos/etcd/blob/master/Documentation/admin_guide.md#restoring-the-cluster
|
||||
|
||||
plog.Debugf("The number of started member is 1. This cluster can accept add member request.")
|
||||
return true
|
||||
}
|
||||
|
||||
nquorum := nmembers/2 + 1
|
||||
if nstarted < nquorum {
|
||||
plog.Warningf("Reject add member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) {
|
||||
members := make(map[types.ID]*Member)
|
||||
removed := make(map[types.ID]bool)
|
||||
|
@ -566,3 +566,83 @@ func newTestCluster(membs []*Member) *cluster {
|
||||
}
|
||||
|
||||
func stringp(s string) *string { return &s }
|
||||
|
||||
func TestIsReadyToAddNewMember(t *testing.T) {
|
||||
tests := []struct {
|
||||
members []*Member
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
// 0/3 members ready, should fail
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "", nil),
|
||||
newTestMember(2, nil, "", nil),
|
||||
newTestMember(3, nil, "", nil),
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
// 1/2 members ready, should fail
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "", nil),
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
// 1/3 members ready, should fail
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "", nil),
|
||||
newTestMember(3, nil, "", nil),
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
// 1/1 members ready, should succeed (special case of 1-member cluster for recovery)
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
// 2/3 members ready, should fail
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "2", nil),
|
||||
newTestMember(3, nil, "", nil),
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
// 3/3 members ready, should be fine to add one member and retain quorum
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "2", nil),
|
||||
newTestMember(3, nil, "3", nil),
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
// 3/4 members ready, should be fine to add one member and retain quorum
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "2", nil),
|
||||
newTestMember(3, nil, "3", nil),
|
||||
newTestMember(4, nil, "", nil),
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
// empty cluster, it is impossible but should fail
|
||||
[]*Member{},
|
||||
false,
|
||||
},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
c := newTestCluster(tt.members)
|
||||
if got := c.isReadyToAddNewMember(); got != tt.want {
|
||||
t.Errorf("%d: isReadyToAddNewMember returned %t, want %t", i, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -49,6 +49,8 @@ type ServerConfig struct {
|
||||
ElectionTicks int
|
||||
|
||||
V3demo bool
|
||||
|
||||
StrictReconfigCheck bool
|
||||
}
|
||||
|
||||
// VerifyBootstrapConfig sanity-checks the initial config for bootstrap case
|
||||
|
@ -31,6 +31,7 @@ var (
|
||||
ErrTimeout = errors.New("etcdserver: request timed out")
|
||||
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
|
||||
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
|
||||
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
|
||||
)
|
||||
|
||||
func isKeyNotFound(err error) bool {
|
||||
|
@ -105,6 +105,10 @@ func (m *Member) Clone() *Member {
|
||||
return mm
|
||||
}
|
||||
|
||||
func (m *Member) IsStarted() bool {
|
||||
return len(m.Name) != 0
|
||||
}
|
||||
|
||||
func memberStoreKey(id types.ID) string {
|
||||
return path.Join(storeMembersPrefix, id.String())
|
||||
}
|
||||
|
@ -603,6 +603,12 @@ func (s *EtcdServer) LeaderStats() []byte {
|
||||
func (s *EtcdServer) StoreStats() []byte { return s.store.JsonStats() }
|
||||
|
||||
func (s *EtcdServer) AddMember(ctx context.Context, memb Member) error {
|
||||
if s.cfg.StrictReconfigCheck && !s.cluster.isReadyToAddNewMember() {
|
||||
// If s.cfg.StrictReconfigCheck is false, it means the option -strict-reconfig-check isn't passed to etcd.
|
||||
// In such a case adding a new member is allowed unconditionally
|
||||
return ErrNotEnoughStartedMembers
|
||||
}
|
||||
|
||||
// TODO: move Member to protobuf type
|
||||
b, err := json.Marshal(memb)
|
||||
if err != nil {
|
||||
|
@ -866,6 +866,7 @@ func TestAddMember(t *testing.T) {
|
||||
store: st,
|
||||
cluster: cl,
|
||||
reqIDGen: idutil.NewGenerator(0, time.Time{}),
|
||||
cfg: &ServerConfig{},
|
||||
}
|
||||
s.start()
|
||||
m := Member{ID: 1234, RaftAttributes: RaftAttributes{PeerURLs: []string{"foo"}}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user