mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: adjust StrictReconfigCheck
Adjust StrictReconfigCheck logic to accommodate learner members in the cluster.
This commit is contained in:
parent
bdcecd1fc4
commit
76a63f9f7d
@ -123,6 +123,19 @@ func (c *RaftCluster) Member(id types.ID) *Member {
|
||||
return c.members[id].Clone()
|
||||
}
|
||||
|
||||
func (c *RaftCluster) VotingMembers() []*Member {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
var ms MembersByID
|
||||
for _, m := range c.members {
|
||||
if !m.IsLearner {
|
||||
ms = append(ms, m.Clone())
|
||||
}
|
||||
}
|
||||
sort.Sort(ms)
|
||||
return []*Member(ms)
|
||||
}
|
||||
|
||||
// MemberByName returns a Member with the given name if exists.
|
||||
// If more than one member has the given name, it will panic.
|
||||
func (c *RaftCluster) MemberByName(name string) *Member {
|
||||
@ -551,11 +564,11 @@ func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*zap.Logger, *s
|
||||
onSet(c.lg, ver)
|
||||
}
|
||||
|
||||
func (c *RaftCluster) IsReadyToAddNewMember() bool {
|
||||
func (c *RaftCluster) IsReadyToAddVotingMember() bool {
|
||||
nmembers := 1
|
||||
nstarted := 0
|
||||
|
||||
for _, member := range c.members {
|
||||
for _, member := range c.VotingMembers() {
|
||||
if member.IsStarted() {
|
||||
nstarted++
|
||||
}
|
||||
@ -592,11 +605,11 @@ func (c *RaftCluster) IsReadyToAddNewMember() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *RaftCluster) IsReadyToRemoveMember(id uint64) bool {
|
||||
func (c *RaftCluster) IsReadyToRemoveVotingMember(id uint64) bool {
|
||||
nmembers := 0
|
||||
nstarted := 0
|
||||
|
||||
for _, member := range c.members {
|
||||
for _, member := range c.VotingMembers() {
|
||||
if uint64(member.ID) == id {
|
||||
continue
|
||||
}
|
||||
|
@ -626,7 +626,7 @@ func newTestCluster(membs []*Member) *RaftCluster {
|
||||
|
||||
func stringp(s string) *string { return &s }
|
||||
|
||||
func TestIsReadyToAddNewMember(t *testing.T) {
|
||||
func TestIsReadyToAddVotingMember(t *testing.T) {
|
||||
tests := []struct {
|
||||
members []*Member
|
||||
want bool
|
||||
@ -697,16 +697,38 @@ func TestIsReadyToAddNewMember(t *testing.T) {
|
||||
[]*Member{},
|
||||
false,
|
||||
},
|
||||
{
|
||||
// 2 voting members ready in cluster with 2 voting members and 2 unstarted learner member, should succeed
|
||||
// (the status of learner members does not affect the readiness of adding voting member)
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "2", nil),
|
||||
newTestMemberAsLearner(3, nil, "", nil),
|
||||
newTestMemberAsLearner(4, nil, "", nil),
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
// 1 voting member ready in cluster with 2 voting members and 2 ready learner member, should fail
|
||||
// (the status of learner members does not affect the readiness of adding voting member)
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "", nil),
|
||||
newTestMemberAsLearner(3, nil, "3", nil),
|
||||
newTestMemberAsLearner(4, nil, "4", nil),
|
||||
},
|
||||
false,
|
||||
},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
c := newTestCluster(tt.members)
|
||||
if got := c.IsReadyToAddNewMember(); got != tt.want {
|
||||
if got := c.IsReadyToAddVotingMember(); got != tt.want {
|
||||
t.Errorf("%d: isReadyToAddNewMember returned %t, want %t", i, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsReadyToRemoveMember(t *testing.T) {
|
||||
func TestIsReadyToRemoveVotingMember(t *testing.T) {
|
||||
tests := []struct {
|
||||
members []*Member
|
||||
removeID uint64
|
||||
@ -782,10 +804,57 @@ func TestIsReadyToRemoveMember(t *testing.T) {
|
||||
4,
|
||||
true,
|
||||
},
|
||||
{
|
||||
// 1 voting members ready in cluster with 1 voting member and 1 ready learner,
|
||||
// removing voting member should fail
|
||||
// (the status of learner members does not affect the readiness of removing voting member)
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMemberAsLearner(2, nil, "2", nil),
|
||||
},
|
||||
1,
|
||||
false,
|
||||
},
|
||||
{
|
||||
// 1 voting members ready in cluster with 2 voting member and 1 ready learner,
|
||||
// removing ready voting member should fail
|
||||
// (the status of learner members does not affect the readiness of removing voting member)
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "", nil),
|
||||
newTestMemberAsLearner(3, nil, "3", nil),
|
||||
},
|
||||
1,
|
||||
false,
|
||||
},
|
||||
{
|
||||
// 1 voting members ready in cluster with 2 voting member and 1 ready learner,
|
||||
// removing unstarted voting member should be fine. (Actual operation will fail)
|
||||
// (the status of learner members does not affect the readiness of removing voting member)
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "", nil),
|
||||
newTestMemberAsLearner(3, nil, "3", nil),
|
||||
},
|
||||
2,
|
||||
true,
|
||||
},
|
||||
{
|
||||
// 1 voting members ready in cluster with 2 voting member and 1 unstarted learner,
|
||||
// removing not-ready voting member should be fine. (Actual operation will fail)
|
||||
// (the status of learner members does not affect the readiness of removing voting member)
|
||||
[]*Member{
|
||||
newTestMember(1, nil, "1", nil),
|
||||
newTestMember(2, nil, "", nil),
|
||||
newTestMemberAsLearner(3, nil, "", nil),
|
||||
},
|
||||
2,
|
||||
true,
|
||||
},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
c := newTestCluster(tt.members)
|
||||
if got := c.IsReadyToRemoveMember(tt.removeID); got != tt.want {
|
||||
if got := c.IsReadyToRemoveVotingMember(tt.removeID); got != tt.want {
|
||||
t.Errorf("%d: isReadyToAddNewMember returned %t, want %t", i, got, tt.want)
|
||||
}
|
||||
}
|
||||
|
@ -1553,43 +1553,17 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) ([]*
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO: might switch to less strict check when adding raft learner
|
||||
if s.Cfg.StrictReconfigCheck {
|
||||
// by default StrictReconfigCheck is enabled; reject new members if unhealthy
|
||||
if !s.cluster.IsReadyToAddNewMember() {
|
||||
if lg := s.getLogger(); lg != nil {
|
||||
lg.Warn(
|
||||
"rejecting member add request; not enough healthy members",
|
||||
zap.String("local-member-id", s.ID().String()),
|
||||
zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
|
||||
zap.Error(ErrNotEnoughStartedMembers),
|
||||
)
|
||||
} else {
|
||||
plog.Warningf("not enough started members, rejecting member add %+v", memb)
|
||||
}
|
||||
return nil, ErrNotEnoughStartedMembers
|
||||
}
|
||||
|
||||
if !isConnectedFullySince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), s.cluster.Members()) {
|
||||
if lg := s.getLogger(); lg != nil {
|
||||
lg.Warn(
|
||||
"rejecting member add request; local member has not been connected to all peers, reconfigure breaks active quorum",
|
||||
zap.String("local-member-id", s.ID().String()),
|
||||
zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
|
||||
zap.Error(ErrUnhealthy),
|
||||
)
|
||||
} else {
|
||||
plog.Warningf("not healthy for reconfigure, rejecting member add %+v", memb)
|
||||
}
|
||||
return nil, ErrUnhealthy
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: move Member to protobuf type
|
||||
b, err := json.Marshal(memb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// by default StrictReconfigCheck is enabled; reject new members if unhealthy.
|
||||
if err := s.mayAddMember(memb); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cc := raftpb.ConfChange{
|
||||
Type: raftpb.ConfChangeAddNode,
|
||||
NodeID: uint64(memb.ID),
|
||||
@ -1603,6 +1577,43 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) ([]*
|
||||
return s.configure(ctx, cc)
|
||||
}
|
||||
|
||||
func (s *EtcdServer) mayAddMember(memb membership.Member) error {
|
||||
if !s.Cfg.StrictReconfigCheck {
|
||||
return nil
|
||||
}
|
||||
|
||||
// protect quorum when adding voting member
|
||||
if !memb.IsLearner && !s.cluster.IsReadyToAddVotingMember() {
|
||||
if lg := s.getLogger(); lg != nil {
|
||||
lg.Warn(
|
||||
"rejecting member add request; not enough healthy members",
|
||||
zap.String("local-member-id", s.ID().String()),
|
||||
zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
|
||||
zap.Error(ErrNotEnoughStartedMembers),
|
||||
)
|
||||
} else {
|
||||
plog.Warningf("not enough started members, rejecting member add %+v", memb)
|
||||
}
|
||||
return ErrNotEnoughStartedMembers
|
||||
}
|
||||
|
||||
if !isConnectedFullySince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), s.cluster.VotingMembers()) {
|
||||
if lg := s.getLogger(); lg != nil {
|
||||
lg.Warn(
|
||||
"rejecting member add request; local member has not been connected to all peers, reconfigure breaks active quorum",
|
||||
zap.String("local-member-id", s.ID().String()),
|
||||
zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
|
||||
zap.Error(ErrUnhealthy),
|
||||
)
|
||||
} else {
|
||||
plog.Warningf("not healthy for reconfigure, rejecting member add %+v", memb)
|
||||
}
|
||||
return ErrUnhealthy
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
|
||||
if err := s.checkMembershipOperationPermission(ctx); err != nil {
|
||||
return nil, err
|
||||
@ -1667,7 +1678,13 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !s.cluster.IsReadyToRemoveMember(uint64(id)) {
|
||||
isLearner := s.cluster.IsMemberExist(id) && s.cluster.Member(id).IsLearner
|
||||
// no need to check quorum when removing non-voting member
|
||||
if isLearner {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !s.cluster.IsReadyToRemoveVotingMember(uint64(id)) {
|
||||
if lg := s.getLogger(); lg != nil {
|
||||
lg.Warn(
|
||||
"rejecting member remove request; not enough healthy members",
|
||||
@ -1687,7 +1704,7 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error {
|
||||
}
|
||||
|
||||
// protect quorum if some members are down
|
||||
m := s.cluster.Members()
|
||||
m := s.cluster.VotingMembers()
|
||||
active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), m)
|
||||
if (active - 1) < 1+((len(m)-1)/2) {
|
||||
if lg := s.getLogger(); lg != nil {
|
||||
|
Loading…
x
Reference in New Issue
Block a user