mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: add mayPromote check
This commit is contained in:
parent
cca8b0d44f
commit
dfe296ac3c
@ -214,14 +214,13 @@ func TestMemberAddForLearner(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemberPromoteForLearner(t *testing.T) {
|
func TestMemberPromoteForNotReadyLearner(t *testing.T) {
|
||||||
// TODO test not ready learner promotion.
|
|
||||||
defer testutil.AfterTest(t)
|
defer testutil.AfterTest(t)
|
||||||
|
|
||||||
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
|
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
|
||||||
defer clus.Terminate(t)
|
defer clus.Terminate(t)
|
||||||
// TODO change the random client to client that talk to leader directly.
|
// first client is talked to leader because cluster size is 1
|
||||||
capi := clus.RandClient()
|
capi := clus.Client(0)
|
||||||
|
|
||||||
urls := []string{"http://127.0.0.1:1234"}
|
urls := []string{"http://127.0.0.1:1234"}
|
||||||
memberAddResp, err := capi.MemberAddAsLearner(context.Background(), urls)
|
memberAddResp, err := capi.MemberAddAsLearner(context.Background(), urls)
|
||||||
@ -244,19 +243,14 @@ func TestMemberPromoteForLearner(t *testing.T) {
|
|||||||
t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners)
|
t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners)
|
||||||
}
|
}
|
||||||
|
|
||||||
memberPromoteResp, err := capi.MemberPromote(context.Background(), learnerID)
|
// since we do not start learner, learner must be not ready.
|
||||||
if err != nil {
|
_, err = capi.MemberPromote(context.Background(), learnerID)
|
||||||
t.Fatalf("failed to promote member: %v", err)
|
expectedErrKeywords := "can only promote a learner member which catches up with leader"
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expecting promote not ready learner to fail, got no error")
|
||||||
}
|
}
|
||||||
|
if !strings.Contains(err.Error(), expectedErrKeywords) {
|
||||||
numberOfLearners = 0
|
t.Errorf("expecting error to contain %s, got %s", expectedErrKeywords, err.Error())
|
||||||
for _, m := range memberPromoteResp.Members {
|
|
||||||
if m.IsLearner {
|
|
||||||
numberOfLearners++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if numberOfLearners != 0 {
|
|
||||||
t.Errorf("learner promoted, expect 0 learner, got %d", numberOfLearners)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -652,6 +652,36 @@ func (c *RaftCluster) IsReadyToRemoveVotingMember(id uint64) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *RaftCluster) IsReadyToPromoteMember(id uint64) bool {
|
||||||
|
nmembers := 1
|
||||||
|
nstarted := 0
|
||||||
|
|
||||||
|
for _, member := range c.VotingMembers() {
|
||||||
|
if member.IsStarted() {
|
||||||
|
nstarted++
|
||||||
|
}
|
||||||
|
nmembers++
|
||||||
|
}
|
||||||
|
|
||||||
|
nquorum := nmembers/2 + 1
|
||||||
|
if nstarted < nquorum {
|
||||||
|
if c.lg != nil {
|
||||||
|
c.lg.Warn(
|
||||||
|
"rejecting member promote; started member will be less than quorum",
|
||||||
|
zap.Int("number-of-started-member", nstarted),
|
||||||
|
zap.Int("quorum", nquorum),
|
||||||
|
zap.String("cluster-id", c.cid.String()),
|
||||||
|
zap.String("local-member-id", c.localID.String()),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
plog.Warningf("Reject promote member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
|
func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
|
||||||
members := make(map[types.ID]*Member)
|
members := make(map[types.ID]*Member)
|
||||||
removed := make(map[types.ID]bool)
|
removed := make(map[types.ID]bool)
|
||||||
|
@ -39,6 +39,7 @@ var toGRPCErrorMap = map[error]error{
|
|||||||
membership.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
|
membership.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
|
||||||
membership.ErrTooManyLearners: rpctypes.ErrGRPCTooManyLearners,
|
membership.ErrTooManyLearners: rpctypes.ErrGRPCTooManyLearners,
|
||||||
etcdserver.ErrNotEnoughStartedMembers: rpctypes.ErrMemberNotEnoughStarted,
|
etcdserver.ErrNotEnoughStartedMembers: rpctypes.ErrMemberNotEnoughStarted,
|
||||||
|
etcdserver.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
|
||||||
|
|
||||||
mvcc.ErrCompacted: rpctypes.ErrGRPCCompacted,
|
mvcc.ErrCompacted: rpctypes.ErrGRPCCompacted,
|
||||||
mvcc.ErrFutureRev: rpctypes.ErrGRPCFutureRev,
|
mvcc.ErrFutureRev: rpctypes.ErrGRPCFutureRev,
|
||||||
|
@ -29,6 +29,7 @@ var (
|
|||||||
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
|
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
|
||||||
ErrLeaderChanged = errors.New("etcdserver: leader changed")
|
ErrLeaderChanged = errors.New("etcdserver: leader changed")
|
||||||
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
|
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
|
||||||
|
ErrLearnerNotReady = errors.New("etcdserver: can only promote a learner member which catches up with leader")
|
||||||
ErrNoLeader = errors.New("etcdserver: no leader")
|
ErrNoLeader = errors.New("etcdserver: no leader")
|
||||||
ErrNotLeader = errors.New("etcdserver: not leader")
|
ErrNotLeader = errors.New("etcdserver: not leader")
|
||||||
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
|
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
|
||||||
|
@ -97,6 +97,8 @@ const (
|
|||||||
maxPendingRevokes = 16
|
maxPendingRevokes = 16
|
||||||
|
|
||||||
recommendedMaxRequestBytes = 10 * 1024 * 1024
|
recommendedMaxRequestBytes = 10 * 1024 * 1024
|
||||||
|
|
||||||
|
readyPercent = 0.9
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -1637,7 +1639,7 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we can promote this learner
|
// check if we can promote this learner.
|
||||||
if err := s.mayPromoteMember(types.ID(id)); err != nil {
|
if err := s.mayPromoteMember(types.ID(id)); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -1665,10 +1667,65 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) mayPromoteMember(id types.ID) error {
|
func (s *EtcdServer) mayPromoteMember(id types.ID) error {
|
||||||
|
err := isLearnerReady(uint64(id))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if !s.Cfg.StrictReconfigCheck {
|
if !s.Cfg.StrictReconfigCheck {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// TODO add more checks whether the member can be promoted.
|
if !s.cluster.IsReadyToPromoteMember(uint64(id)) {
|
||||||
|
if lg := s.getLogger(); lg != nil {
|
||||||
|
lg.Warn(
|
||||||
|
"rejecting member promote request; not enough healthy members",
|
||||||
|
zap.String("local-member-id", s.ID().String()),
|
||||||
|
zap.String("requested-member-remove-id", id.String()),
|
||||||
|
zap.Error(ErrNotEnoughStartedMembers),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
plog.Warningf("not enough started members, rejecting promote member %s", id)
|
||||||
|
}
|
||||||
|
return ErrNotEnoughStartedMembers
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// check whether the learner catches up with leader or not.
|
||||||
|
// Note: it will return nil if member is not found in cluster or if member is not learner.
|
||||||
|
// These two conditions will be checked before apply phase later.
|
||||||
|
func isLearnerReady(id uint64) error {
|
||||||
|
// sanity check, this can happen in the unit test when we do not start node.
|
||||||
|
if raftStatus == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rs := raftStatus()
|
||||||
|
|
||||||
|
// leader's raftStatus.Progress is not nil
|
||||||
|
if rs.Progress == nil {
|
||||||
|
return ErrNotLeader
|
||||||
|
}
|
||||||
|
|
||||||
|
var learnerMatch uint64
|
||||||
|
isFound := false
|
||||||
|
leaderID := rs.ID
|
||||||
|
for memberID, progress := range rs.Progress {
|
||||||
|
if id == memberID {
|
||||||
|
// check its status
|
||||||
|
learnerMatch = progress.Match
|
||||||
|
isFound = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if isFound {
|
||||||
|
leaderMatch := rs.Progress[leaderID].Match
|
||||||
|
// the learner's Match not caught up with leader yet
|
||||||
|
if float64(learnerMatch) < float64(leaderMatch)*readyPercent {
|
||||||
|
return ErrLearnerNotReady
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user