diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index cea328ca3..9a6d2ac9a 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -229,6 +229,8 @@ func (b *backend) Defrag() error { } func (b *backend) defrag() error { + now := time.Now() + // TODO: make this non-blocking? // lock batchTx to ensure nobody is using previous tx, and then // close previous ongoing tx. @@ -285,6 +287,9 @@ func (b *backend) defrag() error { atomic.StoreInt64(&b.size, size) atomic.StoreInt64(&b.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize))) + took := time.Since(now) + defragDurations.Observe(took.Seconds()) + return nil } diff --git a/mvcc/backend/metrics.go b/mvcc/backend/metrics.go index 76fcb9cf2..b266d1e0f 100644 --- a/mvcc/backend/metrics.go +++ b/mvcc/backend/metrics.go @@ -27,8 +27,21 @@ var ( // highest bucket start of 0.001 sec * 2^13 == 8.192 sec Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) + + defragDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "disk", + Name: "backend_defrag_duration_seconds", + Help: "The latency distribution of backend defragmentation.", + + // 100 MB usually takes 1 sec, so start with 10 MB of 100 ms + // lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2 + // highest bucket start of 0.1 sec * 2^12 == 409.6 sec + Buckets: prometheus.ExponentialBuckets(.01, 2, 13), + }) ) func init() { prometheus.MustRegister(commitDurations) + prometheus.MustRegister(defragDurations) }