mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #3479 from mitake/membership
etcdserver: avoid deadlock caused by adding members with wrong peer URLs
This commit is contained in:
commit
0ca800fbac
@ -95,6 +95,7 @@ type config struct {
|
|||||||
fallback *flags.StringsFlag
|
fallback *flags.StringsFlag
|
||||||
initialCluster string
|
initialCluster string
|
||||||
initialClusterToken string
|
initialClusterToken string
|
||||||
|
strictReconfigCheck bool
|
||||||
|
|
||||||
// proxy
|
// proxy
|
||||||
proxy *flags.StringsFlag
|
proxy *flags.StringsFlag
|
||||||
@ -177,6 +178,7 @@ func NewConfig() *config {
|
|||||||
// Should never happen.
|
// Should never happen.
|
||||||
plog.Panicf("unexpected error setting up clusterStateFlag: %v", err)
|
plog.Panicf("unexpected error setting up clusterStateFlag: %v", err)
|
||||||
}
|
}
|
||||||
|
fs.BoolVar(&cfg.strictReconfigCheck, "strict-reconfig-check", false, "Reject reconfiguration that might cause quorum loss")
|
||||||
|
|
||||||
// proxy
|
// proxy
|
||||||
fs.Var(cfg.proxy, "proxy", fmt.Sprintf("Valid values include %s", strings.Join(cfg.proxy.Values, ", ")))
|
fs.Var(cfg.proxy, "proxy", fmt.Sprintf("Valid values include %s", strings.Join(cfg.proxy.Values, ", ")))
|
||||||
|
@ -265,6 +265,7 @@ func startEtcd(cfg *config) (<-chan struct{}, error) {
|
|||||||
TickMs: cfg.TickMs,
|
TickMs: cfg.TickMs,
|
||||||
ElectionTicks: cfg.electionTicks(),
|
ElectionTicks: cfg.electionTicks(),
|
||||||
V3demo: cfg.v3demo,
|
V3demo: cfg.v3demo,
|
||||||
|
StrictReconfigCheck: cfg.strictReconfigCheck,
|
||||||
}
|
}
|
||||||
var s *etcdserver.EtcdServer
|
var s *etcdserver.EtcdServer
|
||||||
s, err = etcdserver.NewServer(srvcfg)
|
s, err = etcdserver.NewServer(srvcfg)
|
||||||
|
@ -368,6 +368,34 @@ func (c *cluster) SetVersion(ver *semver.Version) {
|
|||||||
MustDetectDowngrade(c.version)
|
MustDetectDowngrade(c.version)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *cluster) isReadyToAddNewMember() bool {
|
||||||
|
nmembers := 1
|
||||||
|
nstarted := 0
|
||||||
|
|
||||||
|
for _, member := range c.members {
|
||||||
|
if member.IsStarted() {
|
||||||
|
nstarted++
|
||||||
|
}
|
||||||
|
nmembers++
|
||||||
|
}
|
||||||
|
|
||||||
|
if nstarted == 1 && nmembers == 2 {
|
||||||
|
// a case of adding a new node to 1-member cluster for restoring cluster data
|
||||||
|
// https://github.com/coreos/etcd/blob/master/Documentation/admin_guide.md#restoring-the-cluster
|
||||||
|
|
||||||
|
plog.Debugf("The number of started member is 1. This cluster can accept add member request.")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
nquorum := nmembers/2 + 1
|
||||||
|
if nstarted < nquorum {
|
||||||
|
plog.Warningf("Reject add member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) {
|
func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) {
|
||||||
members := make(map[types.ID]*Member)
|
members := make(map[types.ID]*Member)
|
||||||
removed := make(map[types.ID]bool)
|
removed := make(map[types.ID]bool)
|
||||||
|
@ -566,3 +566,83 @@ func newTestCluster(membs []*Member) *cluster {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func stringp(s string) *string { return &s }
|
func stringp(s string) *string { return &s }
|
||||||
|
|
||||||
|
func TestIsReadyToAddNewMember(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
members []*Member
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
// 0/3 members ready, should fail
|
||||||
|
[]*Member{
|
||||||
|
newTestMember(1, nil, "", nil),
|
||||||
|
newTestMember(2, nil, "", nil),
|
||||||
|
newTestMember(3, nil, "", nil),
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// 1/2 members ready, should fail
|
||||||
|
[]*Member{
|
||||||
|
newTestMember(1, nil, "1", nil),
|
||||||
|
newTestMember(2, nil, "", nil),
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// 1/3 members ready, should fail
|
||||||
|
[]*Member{
|
||||||
|
newTestMember(1, nil, "1", nil),
|
||||||
|
newTestMember(2, nil, "", nil),
|
||||||
|
newTestMember(3, nil, "", nil),
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// 1/1 members ready, should succeed (special case of 1-member cluster for recovery)
|
||||||
|
[]*Member{
|
||||||
|
newTestMember(1, nil, "1", nil),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// 2/3 members ready, should fail
|
||||||
|
[]*Member{
|
||||||
|
newTestMember(1, nil, "1", nil),
|
||||||
|
newTestMember(2, nil, "2", nil),
|
||||||
|
newTestMember(3, nil, "", nil),
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// 3/3 members ready, should be fine to add one member and retain quorum
|
||||||
|
[]*Member{
|
||||||
|
newTestMember(1, nil, "1", nil),
|
||||||
|
newTestMember(2, nil, "2", nil),
|
||||||
|
newTestMember(3, nil, "3", nil),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// 3/4 members ready, should be fine to add one member and retain quorum
|
||||||
|
[]*Member{
|
||||||
|
newTestMember(1, nil, "1", nil),
|
||||||
|
newTestMember(2, nil, "2", nil),
|
||||||
|
newTestMember(3, nil, "3", nil),
|
||||||
|
newTestMember(4, nil, "", nil),
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// empty cluster, it is impossible but should fail
|
||||||
|
[]*Member{},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for i, tt := range tests {
|
||||||
|
c := newTestCluster(tt.members)
|
||||||
|
if got := c.isReadyToAddNewMember(); got != tt.want {
|
||||||
|
t.Errorf("%d: isReadyToAddNewMember returned %t, want %t", i, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -49,6 +49,8 @@ type ServerConfig struct {
|
|||||||
ElectionTicks int
|
ElectionTicks int
|
||||||
|
|
||||||
V3demo bool
|
V3demo bool
|
||||||
|
|
||||||
|
StrictReconfigCheck bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// VerifyBootstrapConfig sanity-checks the initial config for bootstrap case
|
// VerifyBootstrapConfig sanity-checks the initial config for bootstrap case
|
||||||
|
@ -31,6 +31,7 @@ var (
|
|||||||
ErrTimeout = errors.New("etcdserver: request timed out")
|
ErrTimeout = errors.New("etcdserver: request timed out")
|
||||||
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
|
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
|
||||||
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
|
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
|
||||||
|
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
|
||||||
)
|
)
|
||||||
|
|
||||||
func isKeyNotFound(err error) bool {
|
func isKeyNotFound(err error) bool {
|
||||||
|
@ -105,6 +105,10 @@ func (m *Member) Clone() *Member {
|
|||||||
return mm
|
return mm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Member) IsStarted() bool {
|
||||||
|
return len(m.Name) != 0
|
||||||
|
}
|
||||||
|
|
||||||
func memberStoreKey(id types.ID) string {
|
func memberStoreKey(id types.ID) string {
|
||||||
return path.Join(storeMembersPrefix, id.String())
|
return path.Join(storeMembersPrefix, id.String())
|
||||||
}
|
}
|
||||||
|
@ -603,6 +603,12 @@ func (s *EtcdServer) LeaderStats() []byte {
|
|||||||
func (s *EtcdServer) StoreStats() []byte { return s.store.JsonStats() }
|
func (s *EtcdServer) StoreStats() []byte { return s.store.JsonStats() }
|
||||||
|
|
||||||
func (s *EtcdServer) AddMember(ctx context.Context, memb Member) error {
|
func (s *EtcdServer) AddMember(ctx context.Context, memb Member) error {
|
||||||
|
if s.cfg.StrictReconfigCheck && !s.cluster.isReadyToAddNewMember() {
|
||||||
|
// If s.cfg.StrictReconfigCheck is false, it means the option -strict-reconfig-check isn't passed to etcd.
|
||||||
|
// In such a case adding a new member is allowed unconditionally
|
||||||
|
return ErrNotEnoughStartedMembers
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: move Member to protobuf type
|
// TODO: move Member to protobuf type
|
||||||
b, err := json.Marshal(memb)
|
b, err := json.Marshal(memb)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -866,6 +866,7 @@ func TestAddMember(t *testing.T) {
|
|||||||
store: st,
|
store: st,
|
||||||
cluster: cl,
|
cluster: cl,
|
||||||
reqIDGen: idutil.NewGenerator(0, time.Time{}),
|
reqIDGen: idutil.NewGenerator(0, time.Time{}),
|
||||||
|
cfg: &ServerConfig{},
|
||||||
}
|
}
|
||||||
s.start()
|
s.start()
|
||||||
m := Member{ID: 1234, RaftAttributes: RaftAttributes{PeerURLs: []string{"foo"}}}
|
m := Member{ID: 1234, RaftAttributes: RaftAttributes{PeerURLs: []string{"foo"}}}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user