From ab11415d25a2816070cc3dacd01278317f08eded Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Fri, 6 May 2016 15:06:41 -0700 Subject: [PATCH] *: add proposalsCommitted metrics --- Documentation/metrics.md | 3 +++ etcdserver/metrics.go | 7 +++++++ etcdserver/raft.go | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/Documentation/metrics.md b/Documentation/metrics.md index 3988146db..acc3ab656 100644 --- a/Documentation/metrics.md +++ b/Documentation/metrics.md @@ -24,6 +24,7 @@ All these metrics are prefixed with `etcd_server_` |---------------------------|----------------------------------------------------------|---------| | has_leader | Whether or not a leader exists. 1 is existence, 0 is not.| Gauge | | leader_changes_seen_total | The number of leader changes seen. | Counter | +| proposals_committed_total | The total number of consensus proposals committed. | Gauge | `has_leader` indicates whether the member has a leader. If a member does not have a leader, it is @@ -32,6 +33,8 @@ is totally unavailable. `leader_changes_seen_total` counts the number of leader changes the member has seen since its start. Rapid leadership changes impact the performance of etcd significantly. It also signals that the leader is unstable, perhaps due to network connectivity issues or excessive load hitting the etcd cluster. +`proposals_committed_total` records the total number of consensus proposals committed. This gauge should increase over time if the cluster is healthy. Several healthy members of an etcd cluster may have different total committed proposals at once. This discrepancy may be due to recovering from peers after starting, lagging behind the leader, or being the leader and therefore having the most commits. It is important to monitor this metric across all the members in the cluster; a consistently large lag between a single member and its leader indicates that member is slow or unhealthy. + ### network These metrics describe the status of the network. diff --git a/etcdserver/metrics.go b/etcdserver/metrics.go index 70ea3d67c..042b9c133 100644 --- a/etcdserver/metrics.go +++ b/etcdserver/metrics.go @@ -58,6 +58,12 @@ var ( Name: "leader_changes_seen_total", Help: "The number of leader changes seen.", }) + proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "proposals_committed_total", + Help: "The total number of consensus proposals committed.", + }) ) func init() { @@ -66,6 +72,7 @@ func init() { prometheus.MustRegister(proposeFailed) prometheus.MustRegister(hasLeader) prometheus.MustRegister(leaderChanges) + prometheus.MustRegister(proposalsCommitted) } func monitorFileDescriptor(done <-chan struct{}) { diff --git a/etcdserver/raft.go b/etcdserver/raft.go index cc7fb924c..1a9590b21 100644 --- a/etcdserver/raft.go +++ b/etcdserver/raft.go @@ -226,6 +226,10 @@ func (r *raftNode) start(s *EtcdServer) { if err := r.storage.Save(rd.HardState, rd.Entries); err != nil { plog.Fatalf("raft save state and entries error: %v", err) } + if !raft.IsEmptyHardState(rd.HardState) { + proposalsCommitted.Set(float64(rd.HardState.Commit)) + } + r.raftStorage.Append(rd.Entries) if !islead {