*: simplify network metrics

2024-09-27 06:25:44 +00:00 · 2016-05-06 18:02:55 -07:00 · 2016-05-06 18:02:55 -07:00 · 98dbdd5fbb
commit 98dbdd5fbb
parent 34fbec118a
10 changed files with 82 additions and 85 deletions
--- a/Documentation/metrics.md
+++ b/Documentation/metrics.md
@ -32,6 +32,22 @@ is totally unavailable.

 `leader_changes_seen_total` counts the number of leader changes the member has seen since its start. Rapid leadership changes impact the performance of etcd significantly. It also signals that the leader is unstable, perhaps due to network connectivity issues or excessive load hitting the etcd cluster.

+### network
+
+These metrics describe the status of the network.
+
+All these metrics are prefixed with `etcd_network_`
+
+| Name                      | Description                                                        | Type          |
+|---------------------------|--------------------------------------------------------------------|---------------|
+| sent_bytes_total          | The total number of bytes sent to the member with ID `TO`.         | Counter(To)   |
+| received_bytes_total      | The total number of bytes received from the member with ID `From`. | Counter(From) |
+| round_trip_time_seconds   | Round-Trip-Time histogram between members.                         | Histogram(To) |
+
+`sent_bytes_total` counts the total number of bytes sent to a specific member. Usually the leader member sends more data than other members since it is responsible for transmitting replicated data.
+
+`received_bytes_total` counts the total number of bytes received from a specific member. Usually follower members receive data only from the leader member.
+
 ### gRPC requests

 These metrics describe the requests served by a specific etcd member: total received requests, total failed requests, and processing latency. They are useful for tracking user-generated traffic hitting the etcd cluster.
@ -94,24 +110,6 @@ Abnormally high fsync duration (`fsync_duration_seconds`) indicates disk issues

 Abnormally high snapshot duration (`snapshot_save_total_duration_seconds`) indicates disk issues and might cause the cluster to be unstable.

-### rafthttp
-
-| Name                              | Description                                | Type         | Labels                         |
-|-----------------------------------|--------------------------------------------|--------------|--------------------------------|
-| message_sent_latency_seconds      | The latency distributions of messages sent | HistogramVec | sendingType, msgType, remoteID |
-| message_sent_failed_total         | The total number of failed messages sent   | Summary      | sendingType, msgType, remoteID |
-
-
-Abnormally high message duration (`message_sent_latency_seconds`) indicates network issues and might cause the cluster to be unstable.
-
-An increase in message failures (`message_sent_failed_total`) indicates more severe network issues and might cause the cluster to be unstable.
-
-Label `sendingType` is the connection type to send messages. `message`, `msgapp` and `msgappv2` use HTTP streaming, while `pipeline` does HTTP request for each message.
-
-Label `msgType` is the type of raft message. `MsgApp` is log replication messages; `MsgSnap` is snapshot install messages; `MsgProp` is proposal forward messages; the others maintain internal raft status. Given large snapshots, a lengthy msgSnap transmission latency should be expected. For other types of messages, given enough network bandwidth, latencies comparable to ping latency should be expected.
-
-Label `remoteID` is the member ID of the message destination.
-
 ## Prometheus supplied metrics

 The Prometheus client library provides a number of metrics under the `go` and `process` namespaces. There are a few that are particlarly interesting.
--- a/etcdserver/snapshot_merge.go
+++ b/etcdserver/snapshot_merge.go
@ -39,8 +39,9 @@ func (s *EtcdServer) createMergedSnapshotMessage(m raftpb.Message, snapi uint64,
 		plog.Panicf("store save should never fail: %v", err)
 	}

+	dbsnap := s.be.Snapshot()
 	// get a snapshot of v3 KV as readCloser
-	rc := newSnapshotReaderCloser(s.be.Snapshot())
+	rc := newSnapshotReaderCloser(dbsnap)

 	// put the []byte snapshot of store into raft snapshot and return the merged snapshot with
 	// KV readCloser snapshot.
@ -54,7 +55,7 @@ func (s *EtcdServer) createMergedSnapshotMessage(m raftpb.Message, snapi uint64,
 	}
 	m.Snapshot = snapshot

-	return *snap.NewMessage(m, rc)
+	return *snap.NewMessage(m, rc, dbsnap.Size())
 }

 func newSnapshotReaderCloser(snapshot backend.Snapshot) io.ReadCloser {
--- a/rafthttp/http.go
+++ b/rafthttp/http.go
@ -106,12 +106,16 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		http.Error(w, "error reading raft message", http.StatusBadRequest)
 		return
 	}
+
 	var m raftpb.Message
 	if err := m.Unmarshal(b); err != nil {
 		plog.Errorf("failed to unmarshal raft message (%v)", err)
 		http.Error(w, "error unmarshaling raft message", http.StatusBadRequest)
 		return
 	}
+
+	receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(len(b)))
+
 	if err := h.r.Process(context.TODO(), m); err != nil {
 		switch v := err.(type) {
 		case writerToResponse:
@ -181,6 +185,9 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		http.Error(w, msg, http.StatusBadRequest)
 		return
 	}
+
+	receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(m.Size()))
+
 	if m.Type != raftpb.MsgSnap {
 		plog.Errorf("unexpected raft message type %s on snapshot path", m.Type)
 		http.Error(w, "wrong raft message type", http.StatusBadRequest)
@ -189,13 +196,15 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {

 	plog.Infof("receiving database snapshot [index:%d, from %s] ...", m.Snapshot.Metadata.Index, types.ID(m.From))
 	// save incoming database snapshot.
-	if err := h.snapshotter.SaveDBFrom(r.Body, m.Snapshot.Metadata.Index); err != nil {
+	if n, err := h.snapshotter.SaveDBFrom(r.Body, m.Snapshot.Metadata.Index); err != nil {
 		msg := fmt.Sprintf("failed to save KV snapshot (%v)", err)
 		plog.Error(msg)
 		http.Error(w, msg, http.StatusInternalServerError)
 		return
+	} else {
+		receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(n))
+		plog.Infof("received and saved database snapshot [index: %d, from: %s] successfully", m.Snapshot.Metadata.Index, types.ID(m.From))
 	}
-	plog.Infof("received and saved database snapshot [index: %d, from: %s] successfully", m.Snapshot.Metadata.Index, types.ID(m.From))

 	if err := h.r.Process(context.TODO(), m); err != nil {
 		switch v := err.(type) {
--- a/rafthttp/metrics.go
+++ b/rafthttp/metrics.go
@ -14,57 +14,41 @@

 package rafthttp

-import (
-	"time"
-
-	"github.com/coreos/etcd/pkg/types"
-	"github.com/coreos/etcd/raft/raftpb"
-	"github.com/prometheus/client_golang/prometheus"
-)
+import "github.com/prometheus/client_golang/prometheus"

+// TODO: record write/recv failures.
 var (
-	// TODO: create a separate histogram for recording
-	// snapshot sending metric. snapshot can be large and
-	// take a long time to send. So it needs a different
-	// time range than other type of messages.
-	msgSentDuration = prometheus.NewHistogramVec(
-		prometheus.HistogramOpts{
-			Namespace: "etcd_debugging",
-			Subsystem: "rafthttp",
-			Name:      "message_sent_latency_seconds",
-			Help:      "message sent latency distributions.",
-			Buckets:   prometheus.ExponentialBuckets(0.0005, 2, 13),
-		},
-		[]string{"sendingType", "remoteID", "msgType"},
+	sentBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "etcd",
+		Subsystem: "network",
+		Name:      "sent_bytes_total",
+		Help:      "The total number of bytes sent.",
+	},
+		[]string{"To"},
 	)

-	msgSentFailed = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "etcd_debugging",
-		Subsystem: "rafthttp",
-		Name:      "message_sent_failed_total",
-		Help:      "The total number of failed messages sent.",
+	receivedBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "etcd",
+		Subsystem: "network",
+		Name:      "received_bytes_total",
+		Help:      "The total number of bytes received.",
 	},
-		[]string{"sendingType", "remoteID", "msgType"},
+		[]string{"From"},
+	)
+
+	rtts = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "etcd",
+		Subsystem: "network",
+		Name:      "round_trip_time_seconds",
+		Help:      "Round-Trip-Time histogram between members.",
+		Buckets:   prometheus.ExponentialBuckets(0.0001, 2, 14),
+	},
+		[]string{"To"},
 	)
 )

 func init() {
-	prometheus.MustRegister(msgSentDuration)
-	prometheus.MustRegister(msgSentFailed)
-}
-
-func reportSentDuration(sendingType string, m raftpb.Message, duration time.Duration) {
-	typ := m.Type.String()
-	if isLinkHeartbeatMessage(m) {
-		typ = "MsgLinkHeartbeat"
-	}
-	msgSentDuration.WithLabelValues(sendingType, types.ID(m.To).String(), typ).Observe(float64(duration) / float64(time.Second))
-}
-
-func reportSentFailure(sendingType string, m raftpb.Message) {
-	typ := m.Type.String()
-	if isLinkHeartbeatMessage(m) {
-		typ = "MsgLinkHeartbeat"
-	}
-	msgSentFailed.WithLabelValues(sendingType, types.ID(m.To).String(), typ).Inc()
+	prometheus.MustRegister(sentBytes)
+	prometheus.MustRegister(receivedBytes)
+	prometheus.MustRegister(rtts)
 }
--- a/rafthttp/pipeline.go
+++ b/rafthttp/pipeline.go
@ -96,7 +96,6 @@ func (p *pipeline) handle() {
 			if err != nil {
 				p.status.deactivate(failureType{source: pipelineMsg, action: "write"}, err.Error())

-				reportSentFailure(pipelineMsg, m)
 				if m.Type == raftpb.MsgApp && p.fs != nil {
 					p.fs.Fail()
 				}
@ -114,7 +113,7 @@ func (p *pipeline) handle() {
 			if isMsgSnap(m) {
 				p.r.ReportSnapshot(m.To, raft.SnapshotFinish)
 			}
-			reportSentDuration(pipelineMsg, m, time.Since(start))
+			sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(m.Size()))
 		case <-p.stopc:
 			return
 		}
--- a/rafthttp/probing_status.go
+++ b/rafthttp/probing_status.go
@ -53,6 +53,7 @@ func monitorProbingStatus(s probing.Status, id string) {
 			if s.ClockDiff() > time.Second {
 				plog.Warningf("the clock difference against peer %s is too high [%v > %v]", id, s.ClockDiff(), time.Second)
 			}
+			rtts.WithLabelValues(id).Observe(s.SRTT().Seconds())
 		case <-s.StopNotify():
 			return
 		}
--- a/rafthttp/snapshot_sender.go
+++ b/rafthttp/snapshot_sender.go
@ -65,8 +65,6 @@ func (s *snapshotSender) stop() { close(s.stopc) }
 func (s *snapshotSender) send(merged snap.Message) {
 	m := merged.Message

-	start := time.Now()
-
 	body := createSnapBody(merged)
 	defer body.Close()

@ -87,7 +85,6 @@ func (s *snapshotSender) send(merged snap.Message) {
 		}

 		s.picker.unreachable(u)
-		reportSentFailure(sendSnap, m)
 		s.status.deactivate(failureType{source: sendSnap, action: "post"}, err.Error())
 		s.r.ReportUnreachable(m.To)
 		// report SnapshotFailure to raft state machine. After raft state
@ -96,10 +93,11 @@ func (s *snapshotSender) send(merged snap.Message) {
 		s.r.ReportSnapshot(m.To, raft.SnapshotFailure)
 		return
 	}
-	reportSentDuration(sendSnap, m, time.Since(start))
 	s.status.activate()
 	s.r.ReportSnapshot(m.To, raft.SnapshotFinish)
 	plog.Infof("database snapshot [index: %d, to: %s] sent out successfully", m.Snapshot.Metadata.Index, types.ID(m.To))
+
+	sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(merged.TotalSize))
 }

 // post posts the given request.
--- a/rafthttp/stream.go
+++ b/rafthttp/stream.go
@ -139,40 +139,42 @@ func (cw *streamWriter) run() {
 		batched    int
 	)
 	tickc := time.Tick(ConnReadTimeout / 3)
+	unflushed := 0

 	for {
 		select {
 		case <-heartbeatc:
-			start := time.Now()
 			err := enc.encode(linkHeartbeatMessage)
+			unflushed += linkHeartbeatMessage.Size()
 			if err == nil {
 				flusher.Flush()
 				batched = 0
-				reportSentDuration(string(t), linkHeartbeatMessage, time.Since(start))
+				sentBytes.WithLabelValues(cw.id.String()).Add(float64(unflushed))
+				unflushed = 0
 				continue
 			}

-			reportSentFailure(string(t), linkHeartbeatMessage)
 			cw.status.deactivate(failureType{source: t.String(), action: "heartbeat"}, err.Error())
 			cw.close()
 			heartbeatc, msgc = nil, nil

 		case m := <-msgc:
-			start := time.Now()
 			err := enc.encode(m)
 			if err == nil {
+				unflushed += m.Size()
+
 				if len(msgc) == 0 || batched > streamBufSize/2 {
 					flusher.Flush()
+					sentBytes.WithLabelValues(cw.id.String()).Add(float64(unflushed))
+					unflushed = 0
 					batched = 0
 				} else {
 					batched++
 				}

-				reportSentDuration(string(t), m, time.Since(start))
 				continue
 			}

-			reportSentFailure(string(t), m)
 			cw.status.deactivate(failureType{source: t.String(), action: "write"}, err.Error())
 			cw.close()
 			heartbeatc, msgc = nil, nil
@ -190,6 +192,7 @@ func (cw *streamWriter) run() {
 				plog.Panicf("unhandled stream type %s", conn.t)
 			}
 			flusher = conn.Flusher
+			unflushed = 0
 			cw.mu.Lock()
 			cw.status.activate()
 			cw.closer = conn.Closer
@ -332,6 +335,8 @@ func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
 			return err
 		}

+		receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(m.Size()))
+
 		cr.mu.Lock()
 		paused := cr.paused
 		cr.mu.Unlock()
--- a/snap/db.go
+++ b/snap/db.go
@ -26,10 +26,10 @@ import (

 // SaveDBFrom saves snapshot of the database from the given reader. It
 // guarantees the save operation is atomic.
-func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) error {
+func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) (int64, error) {
 	f, err := ioutil.TempFile(s.dir, "tmp")
 	if err != nil {
-		return err
+		return 0, err
 	}
 	var n int64
 	n, err = io.Copy(f, r)
@ -39,22 +39,22 @@ func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) error {
 	f.Close()
 	if err != nil {
 		os.Remove(f.Name())
-		return err
+		return n, err
 	}
 	fn := path.Join(s.dir, fmt.Sprintf("%016x.snap.db", id))
 	if fileutil.Exist(fn) {
 		os.Remove(f.Name())
-		return nil
+		return n, nil
 	}
 	err = os.Rename(f.Name(), fn)
 	if err != nil {
 		os.Remove(f.Name())
-		return err
+		return n, err
 	}

 	plog.Infof("saved database snapshot to disk [total bytes: %d]", n)

-	return nil
+	return n, nil
 }

 // DBFilePath returns the file path for the snapshot of the database with
--- a/snap/message.go
+++ b/snap/message.go
@ -31,13 +31,15 @@ import (
 type Message struct {
 	raftpb.Message
 	ReadCloser io.ReadCloser
+	TotalSize  int64
 	closeC     chan bool
 }

-func NewMessage(rs raftpb.Message, rc io.ReadCloser) *Message {
+func NewMessage(rs raftpb.Message, rc io.ReadCloser, rcSize int64) *Message {
 	return &Message{
 		Message:    rs,
 		ReadCloser: rc,
+		TotalSize:  int64(rs.Size()) + rcSize,
 		closeC:     make(chan bool, 1),
 	}
 }