diff --git a/etcdserver/metrics.go b/etcdserver/metrics.go index a8e5a9da9..06e0ce9f8 100644 --- a/etcdserver/metrics.go +++ b/etcdserver/metrics.go @@ -65,6 +65,12 @@ var ( Name: "proposals_failed_total", Help: "The total number of failed proposals seen.", }) + slowReadIndex = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "slow_read_indexes_total", + Help: "The total number of pending read indexes not in sync with leader's or timed out read index requests.", + }) currentVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "etcd", Subsystem: "server", @@ -82,6 +88,7 @@ func init() { prometheus.MustRegister(proposalsApplied) prometheus.MustRegister(proposalsPending) prometheus.MustRegister(proposalsFailed) + prometheus.MustRegister(slowReadIndex) prometheus.MustRegister(currentVersion) currentVersion.With(prometheus.Labels{ diff --git a/etcdserver/v3_server.go b/etcdserver/v3_server.go index 0b068a852..993567324 100644 --- a/etcdserver/v3_server.go +++ b/etcdserver/v3_server.go @@ -770,12 +770,14 @@ func (s *EtcdServer) linearizableReadLoop() { id2 = binary.BigEndian.Uint64(rs.RequestCtx) } plog.Warningf("ignored out-of-date read index response; local node read indexes queueing up and waiting to be in sync with leader (request ID want %d, got %d)", id1, id2) + slowReadIndex.Inc() } case <-time.After(s.Cfg.ReqTimeout()): plog.Warningf("timed out waiting for read index response") nr.notify(ErrTimeout) timeout = true + slowReadIndex.Inc() case <-s.stopping: return