mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #6851 from gyuho/metrics
v3rpc: replace grpc metrics w/ go-grpc-prometheus
This commit is contained in:
commit
677606da7d
@ -82,31 +82,7 @@ All these metrics are prefixed with `etcd_network_`
|
||||
|
||||
### gRPC requests
|
||||
|
||||
These metrics describe the requests served by a specific etcd member: total received requests, total failed requests, and processing latency. They are useful for tracking user-generated traffic hitting the etcd cluster.
|
||||
|
||||
All these metrics are prefixed with `etcd_grpc_`
|
||||
|
||||
| Name | Description | Type |
|
||||
|--------------------------------|-------------------------------------------------------------------------------------|------------------------|
|
||||
| requests_total | Total number of received requests | Counter(method) |
|
||||
| requests_failed_total | Total number of failed requests. | Counter(method,error) |
|
||||
| active_streams | Total number of active streams. | Gauge(method) |
|
||||
| unary_requests_duration_seconds | Bucketed handling duration of the requests. | Histogram(method) |
|
||||
|
||||
|
||||
Example Prometheus queries that may be useful from these metrics (across all etcd members):
|
||||
|
||||
* `sum(rate(etcd_grpc_requests_failed_total{job="etcd"}[1m]) by (grpc_method) / sum(rate(etcd_grpc_total{job="etcd"})[1m]) by (grpc_method)`
|
||||
|
||||
Shows the fraction of events that failed by gRPC method across all members, across a time window of `1m`.
|
||||
|
||||
* `sum(rate(etcd_grpc_requests_total{job="etcd",grpc_method="PUT"})[1m]) by (grpc_method)`
|
||||
|
||||
Shows the rate of PUT requests across all members, across a time window of `1m`.
|
||||
|
||||
* `histogram_quantile(0.9, sum(rate(etcd_grpc_unary_requests_duration_seconds{job="etcd",grpc_method="PUT"}[5m]) ) by (le))`
|
||||
|
||||
Show the 0.90-tile latency (in seconds) of PUT request handling across all members, with a window of `5m`.
|
||||
These metrics are exposed via [go-grpc-prometheus][go-grpc-prometheus].
|
||||
|
||||
## etcd_debugging namespace metrics
|
||||
|
||||
@ -137,3 +113,4 @@ Heavy file descriptor (`process_open_fds`) usage (i.e., near the process's file
|
||||
[prometheus-getting-started]: http://prometheus.io/docs/introduction/getting_started/
|
||||
[prometheus-naming]: http://prometheus.io/docs/practices/naming/
|
||||
[v2-http-metrics]: v2/metrics.md#http-requests
|
||||
[go-grpc-prometheus]: https://github.com/grpc-ecosystem/go-grpc-prometheus
|
@ -115,20 +115,20 @@
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [{
|
||||
"expr": "sum(rate(etcd_grpc_requests_total [1m]))",
|
||||
"expr": "sum(rate({grpc_type=\"unary\",grpc_code!=\"OK\"} [1m]))",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} RPC Rate",
|
||||
"metric": "etcd_grpc_requests_total",
|
||||
"metric": "grpc_server_started_total",
|
||||
"refId": "A",
|
||||
"step": 4
|
||||
"step": 2
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(etcd_grpc_requests_failed_total [1m]))",
|
||||
"expr": "sum(rate(grpc_server_started_total{grpc_type=\"unary\",grpc_code!=\"OK\"} [1m])) - sum(rate(grpc_server_handled_total{grpc_type=\"unary\"} [1m]))",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} RPC Failed Rate",
|
||||
"metric": "etcd_grpc_requests_failed_total",
|
||||
"metric": "grpc_server_handled_total",
|
||||
"refId": "B",
|
||||
"step": 4
|
||||
"step": 2
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
@ -197,18 +197,18 @@
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [{
|
||||
"expr": "sum(etcd_grpc_active_streams {grpc_service=\"etcdserverpb.Watch\"})",
|
||||
"expr": "sum(grpc_server_started_total {grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\",grpc_code!=\"OK\"}) - sum(grpc_server_handled_total {grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Watch Streams",
|
||||
"metric": "etcd_grpc_active_streams",
|
||||
"metric": "grpc_server_handled_total",
|
||||
"refId": "A",
|
||||
"step": 4
|
||||
},
|
||||
{
|
||||
"expr": "sum(etcd_grpc_active_streams {grpc_service=\"etcdserverpb.Lease\"})",
|
||||
"expr": "sum(grpc_server_started_total {grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total {grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Lease Streams",
|
||||
"metric": "etcd_grpc_active_streams",
|
||||
"metric": "grpc_server_handled_total",
|
||||
"refId": "B",
|
||||
"step": 4
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
"github.com/coreos/etcd/pkg/types"
|
||||
"github.com/coreos/etcd/raft"
|
||||
|
||||
prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/metadata"
|
||||
@ -53,7 +54,8 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
|
||||
}
|
||||
}
|
||||
}
|
||||
return metricsUnaryInterceptor(ctx, req, info, handler)
|
||||
|
||||
return prometheus.UnaryServerInterceptor(ctx, req, info, handler)
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,38 +90,11 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor
|
||||
|
||||
}
|
||||
}
|
||||
return metricsStreamInterceptor(srv, ss, info, handler)
|
||||
|
||||
return prometheus.StreamServerInterceptor(srv, ss, info, handler)
|
||||
}
|
||||
}
|
||||
|
||||
func metricsUnaryInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
|
||||
service, method := splitMethodName(info.FullMethod)
|
||||
receivedCounter.WithLabelValues(service, method).Inc()
|
||||
|
||||
start := time.Now()
|
||||
resp, err = handler(ctx, req)
|
||||
if err != nil {
|
||||
failedCounter.WithLabelValues(service, method, grpc.Code(err).String()).Inc()
|
||||
}
|
||||
handlingDuration.WithLabelValues(service, method).Observe(time.Since(start).Seconds())
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
func metricsStreamInterceptor(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
|
||||
service, method := splitMethodName(info.FullMethod)
|
||||
receivedCounter.WithLabelValues(service, method).Inc()
|
||||
|
||||
streamsGauage.WithLabelValues(service, method).Inc()
|
||||
err := handler(srv, ss)
|
||||
streamsGauage.WithLabelValues(service, method).Dec()
|
||||
if err != nil {
|
||||
failedCounter.WithLabelValues(service, method, grpc.Code(err).String()).Inc()
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func splitMethodName(fullMethodName string) (string, string) {
|
||||
fullMethodName = strings.TrimPrefix(fullMethodName, "/") // remove leading slash
|
||||
if i := strings.Index(fullMethodName, "/"); i >= 0 {
|
||||
|
@ -17,39 +17,6 @@ package v3rpc
|
||||
import "github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
var (
|
||||
receivedCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "grpc",
|
||||
Name: "requests_total",
|
||||
Help: "Counter of received requests.",
|
||||
}, []string{"grpc_service", "grpc_method"})
|
||||
|
||||
failedCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "grpc",
|
||||
Name: "requests_failed_total",
|
||||
Help: "Counter of failed requests.",
|
||||
}, []string{"grpc_service", "grpc_method", "grpc_code"})
|
||||
|
||||
streamsGauage = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "grpc",
|
||||
Name: "active_streams",
|
||||
Help: "Number of active streams.",
|
||||
}, []string{"grpc_service", "grpc_method"})
|
||||
|
||||
handlingDuration = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "grpc",
|
||||
Name: "unary_requests_duration_seconds",
|
||||
Help: "Bucketed histogram of processing time (s) of handled unary (non-stream) requests.",
|
||||
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13),
|
||||
}, []string{"grpc_service", "grpc_method"})
|
||||
|
||||
sentBytes = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "network",
|
||||
@ -66,11 +33,6 @@ var (
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(receivedCounter)
|
||||
prometheus.MustRegister(failedCounter)
|
||||
prometheus.MustRegister(streamsGauage)
|
||||
prometheus.MustRegister(handlingDuration)
|
||||
|
||||
prometheus.MustRegister(sentBytes)
|
||||
prometheus.MustRegister(receivedBytes)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user