etcdserver: use Histogram for proposal_durations

This commit is contained in:
Xiang Li 2015-10-17 12:48:25 -07:00
parent 1c7f52d931
commit d90a47656e
3 changed files with 11 additions and 10 deletions

View File

@ -16,15 +16,15 @@ etcd now exposes the following metrics:
### etcdserver ### etcdserver
| Name | Description | Type | | Name | Description | Type |
|-----------------------------------------|--------------------------------------------------|---------| |-----------------------------------------|--------------------------------------------------|-----------|
| file_descriptors_used_total | The total number of file descriptors used | Gauge | | file_descriptors_used_total | The total number of file descriptors used | Gauge |
| proposal_durations_milliseconds | The latency distributions of committing proposal | Summary | | proposal_durations_seconds | The latency distributions of committing proposal | Histogram |
| pending_proposal_total | The total number of pending proposals | Gauge | | pending_proposal_total | The total number of pending proposals | Gauge |
| proposal_failed_total | The total number of failed proposals | Counter | | proposal_failed_total | The total number of failed proposals | Counter |
High file descriptors (`file_descriptors_used_total`) usage (near the file descriptors limitation of the process) indicates a potential out of file descriptors issue. That might cause etcd fails to create new WAL files and panics. High file descriptors (`file_descriptors_used_total`) usage (near the file descriptors limitation of the process) indicates a potential out of file descriptors issue. That might cause etcd fails to create new WAL files and panics.
[Proposal](glossary.md#proposal) durations (`proposal_durations_milliseconds`) give you an summary about the proposal commit latency. Latency can be introduced into this process by network and disk IO. [Proposal](glossary.md#proposal) durations (`proposal_durations_seconds`) give you an histogram about the proposal commit latency. Latency can be introduced into this process by network and disk IO.
Pending proposal (`pending_proposal_total`) gives you an idea about how many proposal are in the queue and waiting for commit. An increasing pending number indicates a high client load or an unstable cluster. Pending proposal (`pending_proposal_total`) gives you an idea about how many proposal are in the queue and waiting for commit. An increasing pending number indicates a high client load or an unstable cluster.

View File

@ -23,11 +23,12 @@ import (
var ( var (
// TODO: with label in v3? // TODO: with label in v3?
proposeDurations = prometheus.NewSummary(prometheus.SummaryOpts{ proposeDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "etcd", Namespace: "etcd",
Subsystem: "server", Subsystem: "server",
Name: "proposal_durations_milliseconds", Name: "proposal_durations_seconds",
Help: "The latency distributions of committing proposal.", Help: "The latency distributions of committing proposal.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
}) })
proposePending = prometheus.NewGauge(prometheus.GaugeOpts{ proposePending = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd", Namespace: "etcd",

View File

@ -598,7 +598,7 @@ func (s *EtcdServer) Do(ctx context.Context, r pb.Request) (Response, error) {
select { select {
case x := <-ch: case x := <-ch:
proposeDurations.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Millisecond))) proposeDurations.Observe(float64(time.Since(start)) / float64(time.Second))
resp := x.(Response) resp := x.(Response)
return resp, resp.err return resp, resp.err
case <-ctx.Done(): case <-ctx.Done():