mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
rafthttp/metrics.go:fixed TODO: record write/recv failures.
This commit is contained in:
parent
9a67d71e6c
commit
64e1a327ee
@ -70,6 +70,8 @@ All these metrics are prefixed with `etcd_network_`
|
|||||||
|---------------------------|--------------------------------------------------------------------|---------------|
|
|---------------------------|--------------------------------------------------------------------|---------------|
|
||||||
| peer_sent_bytes_total | The total number of bytes sent to the peer with ID `To`. | Counter(To) |
|
| peer_sent_bytes_total | The total number of bytes sent to the peer with ID `To`. | Counter(To) |
|
||||||
| peer_received_bytes_total | The total number of bytes received from the peer with ID `From`. | Counter(From) |
|
| peer_received_bytes_total | The total number of bytes received from the peer with ID `From`. | Counter(From) |
|
||||||
|
| peer_sent_failures_total | The total number of send failures from the peer with ID `To`. | Counter(To) |
|
||||||
|
| peer_received_failures_total | The total number of receive failures from the peer with ID `From`. | Counter(From) |
|
||||||
| peer_round_trip_time_seconds | Round-Trip-Time histogram between peers. | Histogram(To) |
|
| peer_round_trip_time_seconds | Round-Trip-Time histogram between peers. | Histogram(To) |
|
||||||
| client_grpc_sent_bytes_total | The total number of bytes sent to grpc clients. | Counter |
|
| client_grpc_sent_bytes_total | The total number of bytes sent to grpc clients. | Counter |
|
||||||
| client_grpc_received_bytes_total| The total number of bytes received to grpc clients. | Counter |
|
| client_grpc_received_bytes_total| The total number of bytes received to grpc clients. | Counter |
|
||||||
|
@ -104,6 +104,7 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
plog.Errorf("failed to read raft message (%v)", err)
|
plog.Errorf("failed to read raft message (%v)", err)
|
||||||
http.Error(w, "error reading raft message", http.StatusBadRequest)
|
http.Error(w, "error reading raft message", http.StatusBadRequest)
|
||||||
|
recvFailures.WithLabelValues(r.RemoteAddr).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,6 +112,7 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
if err := m.Unmarshal(b); err != nil {
|
if err := m.Unmarshal(b); err != nil {
|
||||||
plog.Errorf("failed to unmarshal raft message (%v)", err)
|
plog.Errorf("failed to unmarshal raft message (%v)", err)
|
||||||
http.Error(w, "error unmarshaling raft message", http.StatusBadRequest)
|
http.Error(w, "error unmarshaling raft message", http.StatusBadRequest)
|
||||||
|
recvFailures.WithLabelValues(r.RemoteAddr).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -186,6 +188,7 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
msg := fmt.Sprintf("failed to decode raft message (%v)", err)
|
msg := fmt.Sprintf("failed to decode raft message (%v)", err)
|
||||||
plog.Errorf(msg)
|
plog.Errorf(msg)
|
||||||
http.Error(w, msg, http.StatusBadRequest)
|
http.Error(w, msg, http.StatusBadRequest)
|
||||||
|
recvFailures.WithLabelValues(r.RemoteAddr).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,7 +16,6 @@ package rafthttp
|
|||||||
|
|
||||||
import "github.com/prometheus/client_golang/prometheus"
|
import "github.com/prometheus/client_golang/prometheus"
|
||||||
|
|
||||||
// TODO: record write/recv failures.
|
|
||||||
var (
|
var (
|
||||||
sentBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
|
sentBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
Namespace: "etcd",
|
Namespace: "etcd",
|
||||||
@ -36,6 +35,24 @@ var (
|
|||||||
[]string{"From"},
|
[]string{"From"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
sentFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "etcd",
|
||||||
|
Subsystem: "network",
|
||||||
|
Name: "peer_sent_failures_total",
|
||||||
|
Help: "The total number of send failures from peers.",
|
||||||
|
},
|
||||||
|
[]string{"To"},
|
||||||
|
)
|
||||||
|
|
||||||
|
recvFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "etcd",
|
||||||
|
Subsystem: "network",
|
||||||
|
Name: "peer_received_failures_total",
|
||||||
|
Help: "The total number of receive failures from peers.",
|
||||||
|
},
|
||||||
|
[]string{"From"},
|
||||||
|
)
|
||||||
|
|
||||||
rtts = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
rtts = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
Namespace: "etcd",
|
Namespace: "etcd",
|
||||||
Subsystem: "network",
|
Subsystem: "network",
|
||||||
@ -50,5 +67,7 @@ var (
|
|||||||
func init() {
|
func init() {
|
||||||
prometheus.MustRegister(sentBytes)
|
prometheus.MustRegister(sentBytes)
|
||||||
prometheus.MustRegister(receivedBytes)
|
prometheus.MustRegister(receivedBytes)
|
||||||
|
prometheus.MustRegister(sentFailures)
|
||||||
|
prometheus.MustRegister(recvFailures)
|
||||||
prometheus.MustRegister(rtts)
|
prometheus.MustRegister(rtts)
|
||||||
}
|
}
|
||||||
|
@ -93,6 +93,7 @@ func (p *pipeline) handle() {
|
|||||||
if isMsgSnap(m) {
|
if isMsgSnap(m) {
|
||||||
p.raft.ReportSnapshot(m.To, raft.SnapshotFailure)
|
p.raft.ReportSnapshot(m.To, raft.SnapshotFailure)
|
||||||
}
|
}
|
||||||
|
sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,6 +91,7 @@ func (s *snapshotSender) send(merged snap.Message) {
|
|||||||
// machine knows about it, it would pause a while and retry sending
|
// machine knows about it, it would pause a while and retry sending
|
||||||
// new snapshot message.
|
// new snapshot message.
|
||||||
s.r.ReportSnapshot(m.To, raft.SnapshotFailure)
|
s.r.ReportSnapshot(m.To, raft.SnapshotFailure)
|
||||||
|
sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
s.status.activate()
|
s.status.activate()
|
||||||
|
@ -158,6 +158,7 @@ func (cw *streamWriter) run() {
|
|||||||
|
|
||||||
cw.status.deactivate(failureType{source: t.String(), action: "heartbeat"}, err.Error())
|
cw.status.deactivate(failureType{source: t.String(), action: "heartbeat"}, err.Error())
|
||||||
|
|
||||||
|
sentFailures.WithLabelValues(cw.peerID.String()).Inc()
|
||||||
cw.close()
|
cw.close()
|
||||||
plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
|
plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
|
||||||
heartbeatc, msgc = nil, nil
|
heartbeatc, msgc = nil, nil
|
||||||
@ -184,6 +185,7 @@ func (cw *streamWriter) run() {
|
|||||||
plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
|
plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
|
||||||
heartbeatc, msgc = nil, nil
|
heartbeatc, msgc = nil, nil
|
||||||
cw.r.ReportUnreachable(m.To)
|
cw.r.ReportUnreachable(m.To)
|
||||||
|
sentFailures.WithLabelValues(cw.peerID.String()).Inc()
|
||||||
|
|
||||||
case conn := <-cw.connc:
|
case conn := <-cw.connc:
|
||||||
cw.mu.Lock()
|
cw.mu.Lock()
|
||||||
@ -388,6 +390,7 @@ func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
|
|||||||
plog.MergeWarningf("dropped internal raft message from %s since receiving buffer is full (overloaded network)", types.ID(m.From))
|
plog.MergeWarningf("dropped internal raft message from %s since receiving buffer is full (overloaded network)", types.ID(m.From))
|
||||||
}
|
}
|
||||||
plog.Debugf("dropped %s from %s since receiving buffer is full", m.Type, types.ID(m.From))
|
plog.Debugf("dropped %s from %s since receiving buffer is full", m.Type, types.ID(m.From))
|
||||||
|
recvFailures.WithLabelValues(types.ID(m.From).String()).Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user