From c1e4e647eb8d56720be0556e0bee24ed8cb87d71 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Sat, 17 Oct 2015 12:57:18 -0700 Subject: [PATCH] snap: use Histogram for snap metrics --- Documentation/metrics.md | 8 ++++---- snap/metrics.go | 10 ++++++---- snap/snapshotter.go | 4 ++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Documentation/metrics.md b/Documentation/metrics.md index 8ba409a02..accc2e5fe 100644 --- a/Documentation/metrics.md +++ b/Documentation/metrics.md @@ -73,11 +73,11 @@ Example Prometheus queries that may be useful from these metrics (across all etc ### snapshot -| Name | Description | Type | -|--------------------------------------------|------------------------------------------------------------|---------| -| snapshot_save_total_durations_microseconds | The total latency distributions of save called by snapshot | Summary | +| Name | Description | Type | +|--------------------------------------------|------------------------------------------------------------|-----------| +| snapshot_save_total_durations_seconds | The total latency distributions of save called by snapshot | Histogram | -Abnormally high snapshot duration (`snapshot_save_total_durations_microseconds`) indicates disk issues and might cause the cluster to be unstable. +Abnormally high snapshot duration (`snapshot_save_total_durations_seconds`) indicates disk issues and might cause the cluster to be unstable. ### rafthttp diff --git a/snap/metrics.go b/snap/metrics.go index 72758499a..918baffb0 100644 --- a/snap/metrics.go +++ b/snap/metrics.go @@ -18,18 +18,20 @@ import "github.com/coreos/etcd/Godeps/_workspace/src/github.com/prometheus/clien var ( // TODO: save_fsync latency? - saveDurations = prometheus.NewSummary(prometheus.SummaryOpts{ + saveDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "etcd", Subsystem: "snapshot", - Name: "save_total_durations_microseconds", + Name: "save_total_durations_seconds", Help: "The total latency distributions of save called by snapshot.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) - marshallingDurations = prometheus.NewSummary(prometheus.SummaryOpts{ + marshallingDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "etcd", Subsystem: "snapshot", - Name: "save_marshalling_durations_microseconds", + Name: "save_marshalling_durations_seconds", Help: "The marshalling cost distributions of save called by snapshot.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) ) diff --git a/snap/snapshotter.go b/snap/snapshotter.go index 4f9eb9ed8..dd4fe02af 100644 --- a/snap/snapshotter.go +++ b/snap/snapshotter.go @@ -74,12 +74,12 @@ func (s *Snapshotter) save(snapshot *raftpb.Snapshot) error { if err != nil { return err } else { - marshallingDurations.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Microsecond))) + marshallingDurations.Observe(float64(time.Since(start)) / float64(time.Second)) } err = ioutil.WriteFile(path.Join(s.dir, fname), d, 0666) if err == nil { - saveDurations.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Microsecond))) + saveDurations.Observe(float64(time.Since(start)) / float64(time.Second)) } return err }