mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: add mayPromote check
This commit is contained in:
parent
cca8b0d44f
commit
dfe296ac3c
@ -214,14 +214,13 @@ func TestMemberAddForLearner(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemberPromoteForLearner(t *testing.T) {
|
||||
// TODO test not ready learner promotion.
|
||||
func TestMemberPromoteForNotReadyLearner(t *testing.T) {
|
||||
defer testutil.AfterTest(t)
|
||||
|
||||
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
|
||||
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
|
||||
defer clus.Terminate(t)
|
||||
// TODO change the random client to client that talk to leader directly.
|
||||
capi := clus.RandClient()
|
||||
// first client is talked to leader because cluster size is 1
|
||||
capi := clus.Client(0)
|
||||
|
||||
urls := []string{"http://127.0.0.1:1234"}
|
||||
memberAddResp, err := capi.MemberAddAsLearner(context.Background(), urls)
|
||||
@ -244,19 +243,14 @@ func TestMemberPromoteForLearner(t *testing.T) {
|
||||
t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners)
|
||||
}
|
||||
|
||||
memberPromoteResp, err := capi.MemberPromote(context.Background(), learnerID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to promote member: %v", err)
|
||||
// since we do not start learner, learner must be not ready.
|
||||
_, err = capi.MemberPromote(context.Background(), learnerID)
|
||||
expectedErrKeywords := "can only promote a learner member which catches up with leader"
|
||||
if err == nil {
|
||||
t.Fatalf("expecting promote not ready learner to fail, got no error")
|
||||
}
|
||||
|
||||
numberOfLearners = 0
|
||||
for _, m := range memberPromoteResp.Members {
|
||||
if m.IsLearner {
|
||||
numberOfLearners++
|
||||
}
|
||||
}
|
||||
if numberOfLearners != 0 {
|
||||
t.Errorf("learner promoted, expect 0 learner, got %d", numberOfLearners)
|
||||
if !strings.Contains(err.Error(), expectedErrKeywords) {
|
||||
t.Errorf("expecting error to contain %s, got %s", expectedErrKeywords, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -652,6 +652,36 @@ func (c *RaftCluster) IsReadyToRemoveVotingMember(id uint64) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *RaftCluster) IsReadyToPromoteMember(id uint64) bool {
|
||||
nmembers := 1
|
||||
nstarted := 0
|
||||
|
||||
for _, member := range c.VotingMembers() {
|
||||
if member.IsStarted() {
|
||||
nstarted++
|
||||
}
|
||||
nmembers++
|
||||
}
|
||||
|
||||
nquorum := nmembers/2 + 1
|
||||
if nstarted < nquorum {
|
||||
if c.lg != nil {
|
||||
c.lg.Warn(
|
||||
"rejecting member promote; started member will be less than quorum",
|
||||
zap.Int("number-of-started-member", nstarted),
|
||||
zap.Int("quorum", nquorum),
|
||||
zap.String("cluster-id", c.cid.String()),
|
||||
zap.String("local-member-id", c.localID.String()),
|
||||
)
|
||||
} else {
|
||||
plog.Warningf("Reject promote member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
|
||||
members := make(map[types.ID]*Member)
|
||||
removed := make(map[types.ID]bool)
|
||||
|
@ -39,6 +39,7 @@ var toGRPCErrorMap = map[error]error{
|
||||
membership.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
|
||||
membership.ErrTooManyLearners: rpctypes.ErrGRPCTooManyLearners,
|
||||
etcdserver.ErrNotEnoughStartedMembers: rpctypes.ErrMemberNotEnoughStarted,
|
||||
etcdserver.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
|
||||
|
||||
mvcc.ErrCompacted: rpctypes.ErrGRPCCompacted,
|
||||
mvcc.ErrFutureRev: rpctypes.ErrGRPCFutureRev,
|
||||
|
@ -29,6 +29,7 @@ var (
|
||||
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
|
||||
ErrLeaderChanged = errors.New("etcdserver: leader changed")
|
||||
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
|
||||
ErrLearnerNotReady = errors.New("etcdserver: can only promote a learner member which catches up with leader")
|
||||
ErrNoLeader = errors.New("etcdserver: no leader")
|
||||
ErrNotLeader = errors.New("etcdserver: not leader")
|
||||
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
|
||||
|
@ -97,6 +97,8 @@ const (
|
||||
maxPendingRevokes = 16
|
||||
|
||||
recommendedMaxRequestBytes = 10 * 1024 * 1024
|
||||
|
||||
readyPercent = 0.9
|
||||
)
|
||||
|
||||
var (
|
||||
@ -1637,7 +1639,7 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// check if we can promote this learner
|
||||
// check if we can promote this learner.
|
||||
if err := s.mayPromoteMember(types.ID(id)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -1665,10 +1667,65 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
|
||||
}
|
||||
|
||||
func (s *EtcdServer) mayPromoteMember(id types.ID) error {
|
||||
err := isLearnerReady(uint64(id))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !s.Cfg.StrictReconfigCheck {
|
||||
return nil
|
||||
}
|
||||
// TODO add more checks whether the member can be promoted.
|
||||
if !s.cluster.IsReadyToPromoteMember(uint64(id)) {
|
||||
if lg := s.getLogger(); lg != nil {
|
||||
lg.Warn(
|
||||
"rejecting member promote request; not enough healthy members",
|
||||
zap.String("local-member-id", s.ID().String()),
|
||||
zap.String("requested-member-remove-id", id.String()),
|
||||
zap.Error(ErrNotEnoughStartedMembers),
|
||||
)
|
||||
} else {
|
||||
plog.Warningf("not enough started members, rejecting promote member %s", id)
|
||||
}
|
||||
return ErrNotEnoughStartedMembers
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// check whether the learner catches up with leader or not.
|
||||
// Note: it will return nil if member is not found in cluster or if member is not learner.
|
||||
// These two conditions will be checked before apply phase later.
|
||||
func isLearnerReady(id uint64) error {
|
||||
// sanity check, this can happen in the unit test when we do not start node.
|
||||
if raftStatus == nil {
|
||||
return nil
|
||||
}
|
||||
rs := raftStatus()
|
||||
|
||||
// leader's raftStatus.Progress is not nil
|
||||
if rs.Progress == nil {
|
||||
return ErrNotLeader
|
||||
}
|
||||
|
||||
var learnerMatch uint64
|
||||
isFound := false
|
||||
leaderID := rs.ID
|
||||
for memberID, progress := range rs.Progress {
|
||||
if id == memberID {
|
||||
// check its status
|
||||
learnerMatch = progress.Match
|
||||
isFound = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if isFound {
|
||||
leaderMatch := rs.Progress[leaderID].Match
|
||||
// the learner's Match not caught up with leader yet
|
||||
if float64(learnerMatch) < float64(leaderMatch)*readyPercent {
|
||||
return ErrLearnerNotReady
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user