Merge pull request #10731 from WIZARD-CXY/learner_metric

etcdserver: add learner metrics
This commit is contained in:
Jingyi Hu 2019-06-08 22:43:03 -07:00 committed by GitHub
commit 48d144a3de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 0 deletions

View File

@ -533,6 +533,21 @@ etcd_server_is_leader
# type: "counter"
etcd_server_leader_changes_seen_total
# name: "etcd_server_is_learner"
# description: "Whether or not this member is a learner. 1 if is, 0 otherwise."
# type: "gauge"
etcd_server_is_learner
# name: "etcd_server_learner_promote_failures"
# description: "The total number of failed learner promotions (likely learner not ready) while this member is leader."
# type: "counter"
etcd_server_learner_promote_failures
# name: "etcd_server_learner_promote_successes"
# description: "The total number of successful learner promotions while this member is leader."
# type: "counter"
etcd_server_learner_promote_successes
# name: "etcd_server_proposals_applied_total"
# description: "The total number of consensus proposals applied."
# type: "gauge"

View File

@ -44,6 +44,26 @@ var (
Name: "leader_changes_seen_total",
Help: "The number of leader changes seen.",
})
isLearner = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "is_learner",
Help: "Whether or not this member is a learner. 1 if is, 0 otherwise.",
})
learnerPromoteFailed = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "learner_promote_failures",
Help: "The total number of failed learner promotions (likely learner not ready) while this member is leader.",
},
[]string{"Reason"},
)
learnerPromoteSucceed = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "learner_promote_successes",
Help: "The total number of successful learner promotions while this member is leader.",
})
heartbeatSendFailures = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
@ -144,6 +164,9 @@ func init() {
prometheus.MustRegister(currentVersion)
prometheus.MustRegister(currentGoVersion)
prometheus.MustRegister(serverID)
prometheus.MustRegister(isLearner)
prometheus.MustRegister(learnerPromoteSucceed)
prometheus.MustRegister(learnerPromoteFailed)
currentVersion.With(prometheus.Labels{
"server_version": version.Version,

View File

@ -1649,7 +1649,12 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
// fails with ErrNotLeader, forward the request to leader node via HTTP. If promoteMember call fails with error
// other than ErrNotLeader, return the error.
resp, err := s.promoteMember(ctx, id)
if err == nil {
learnerPromoteSucceed.Inc()
return resp, nil
}
if err != ErrNotLeader {
learnerPromoteFailed.WithLabelValues(err.Error()).Inc()
return resp, err
}
@ -2262,6 +2267,15 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
}
}
// update the isLearner metric when this server id is equal to the id in raft member confChange
if confChangeContext.Member.ID == s.id {
if cc.Type == raftpb.ConfChangeAddLearnerNode {
isLearner.Set(1)
} else {
isLearner.Set(0)
}
}
case raftpb.ConfChangeRemoveNode:
id := types.ID(cc.NodeID)
s.cluster.RemoveMember(id)