etcdserver/api: add "etcd_network_snapshot_send_inflights_total", "etcd_network_snapshot_receive_inflights_total"

Useful for deciding when to terminate the unhealthy follower.
If the follower is receiving a leader snapshot, operator may wait.

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
This commit is contained in:
Gyuho Lee 2019-08-08 12:39:27 -07:00
parent 629cb7aa5e
commit abdb7ca17b
3 changed files with 30 additions and 1 deletions

View File

@ -258,6 +258,11 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
return
}
snapshotReceiveInflights.WithLabelValues(from).Inc()
defer func() {
snapshotReceiveInflights.WithLabelValues(from).Dec()
}()
if h.lg != nil {
h.lg.Info(
"receiving database snapshot",

View File

@ -80,6 +80,15 @@ var (
[]string{"To"},
)
snapshotSendInflights = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "snapshot_send_inflights_total",
Help: "Total number of inflight snapshot sends",
},
[]string{"To"},
)
snapshotSendFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
@ -111,6 +120,15 @@ var (
[]string{"From"},
)
snapshotReceiveInflights = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "snapshot_receive_inflights_total",
Help: "Total number of inflight snapshot receives",
},
[]string{"From"},
)
snapshotReceiveFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
@ -156,9 +174,11 @@ func init() {
prometheus.MustRegister(recvFailures)
prometheus.MustRegister(snapshotSend)
prometheus.MustRegister(snapshotSendInflights)
prometheus.MustRegister(snapshotSendFailures)
prometheus.MustRegister(snapshotSendSeconds)
prometheus.MustRegister(snapshotReceive)
prometheus.MustRegister(snapshotReceiveInflights)
prometheus.MustRegister(snapshotReceiveFailures)
prometheus.MustRegister(snapshotReceiveSeconds)

View File

@ -90,6 +90,11 @@ func (s *snapshotSender) send(merged snap.Message) {
plog.Infof("start to send database snapshot [index: %d, to %s]...", m.Snapshot.Metadata.Index, types.ID(m.To))
}
snapshotSendInflights.WithLabelValues(to).Inc()
defer func() {
snapshotSendInflights.WithLabelValues(to).Dec()
}()
err := s.post(req)
defer merged.CloseWithError(err)
if err != nil {
@ -139,7 +144,6 @@ func (s *snapshotSender) send(merged snap.Message) {
}
sentBytes.WithLabelValues(to).Add(float64(merged.TotalSize))
snapshotSend.WithLabelValues(to).Inc()
snapshotSendSeconds.WithLabelValues(to).Observe(time.Since(start).Seconds())
}