diff --git a/Documentation/op-guide/grafana.json b/Documentation/op-guide/grafana.json index 45bfc6513..c405fdfee 100644 --- a/Documentation/op-guide/grafana.json +++ b/Documentation/op-guide/grafana.json @@ -341,7 +341,7 @@ "steppedLine": false, "targets": [ { - "expr": "etcd_debugging_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", + "expr": "etcd_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", "hide": false, "interval": "", "intervalFactor": 2, diff --git a/Documentation/op-guide/maintenance.md b/Documentation/op-guide/maintenance.md index 294d8c7d6..7e85a11cf 100644 --- a/Documentation/op-guide/maintenance.md +++ b/Documentation/op-guide/maintenance.md @@ -149,7 +149,9 @@ $ ETCDCTL_API=3 etcdctl put newkey 123 OK ``` -The metric `etcd_debugging_mvcc_db_total_size_in_use_in_bytes` indicates the actual database usage after a history compaction, while `etcd_debugging_mvcc_db_total_size_in_bytes` shows the database size including free space waiting for defragmentation. The latter increases only when the former is close to it, meaning when both of these metrics are close to the quota, a history compaction is required to avoid triggering the space quota. +The metric `etcd_mvcc_db_total_size_in_use_in_bytes` indicates the actual database usage after a history compaction, while `etcd_debugging_mvcc_db_total_size_in_bytes` shows the database size including free space waiting for defragmentation. The latter increases only when the former is close to it, meaning when both of these metrics are close to the quota, a history compaction is required to avoid triggering the space quota. + +`etcd_debugging_mvcc_db_total_size_in_bytes` is renamed to `etcd_mvcc_db_total_size_in_bytes` from v3.4. ## Snapshot backup diff --git a/Documentation/upgrades/upgrade_3_4.md b/Documentation/upgrades/upgrade_3_4.md index fc5b3fb29..130f87ff0 100644 --- a/Documentation/upgrades/upgrade_3_4.md +++ b/Documentation/upgrades/upgrade_3_4.md @@ -47,6 +47,19 @@ OK +etcd --peer-trusted-ca-file ca-peer.crt ``` +#### Promote `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics + +v3.4 promotes `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics to `etcd_mvcc_db_total_size_in_bytes`, in order to encourage etcd storage monitoring. + +`etcd_debugging_mvcc_db_total_size_in_bytes` is still served in v3.4 for backward compatibilities. It will be completely deprecated in v3.5. + +```diff +-etcd_debugging_mvcc_db_total_size_in_bytes ++etcd_mvcc_db_total_size_in_bytes +``` + +Note that `etcd_debugging_*` namespace metrics have been marked as experimental. As we improve monitoring guide, we will promote more metrics. + #### Deprecating `etcd --log-output` flag (now `--log-outputs`) Rename [`etcd --log-output` to `--log-outputs`](https://github.com/coreos/etcd/pull/9624) to support multiple log outputs. **`etcd --logger=capnslog` does not support multiple log outputs.** diff --git a/Documentation/upgrades/upgrade_3_5.md b/Documentation/upgrades/upgrade_3_5.md index 9f0d1eeeb..6d9a70a15 100644 --- a/Documentation/upgrades/upgrade_3_5.md +++ b/Documentation/upgrades/upgrade_3_5.md @@ -14,6 +14,17 @@ Before [starting an upgrade](#upgrade-procedure), read through the rest of this Highlighted breaking changes in 3.5. +#### Deprecate `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics + +v3.4 promoted `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics to `etcd_mvcc_db_total_size_in_bytes`, in order to encourage etcd storage monitoring. And v3.5 completely deprcates `etcd_debugging_mvcc_db_total_size_in_bytes`. + +```diff +-etcd_debugging_mvcc_db_total_size_in_bytes ++etcd_mvcc_db_total_size_in_bytes +``` + +Note that `etcd_debugging_*` namespace metrics have been marked as experimental. As we improve monitoring guide, we will promote more metrics. + #### Deprecated in `etcd --logger capnslog` v3.4 defaults to `--logger=zap` in order to support multiple log outputs and structured logging. diff --git a/integration/metrics_test.go b/integration/metrics_test.go index 3dccd220a..ca50ef67e 100644 --- a/integration/metrics_test.go +++ b/integration/metrics_test.go @@ -40,8 +40,16 @@ func TestMetricDbSizeBoot(t *testing.T) { } } -// TestMetricDbSizeDefrag checks that the db size metric is set after defrag. func TestMetricDbSizeDefrag(t *testing.T) { + testMetricDbSizeDefrag(t, "etcd") +} + +func TestMetricDbSizeDefragDebugging(t *testing.T) { + testMetricDbSizeDefrag(t, "etcd_debugging") +} + +// testMetricDbSizeDefrag checks that the db size metric is set after defrag. +func testMetricDbSizeDefrag(t *testing.T, name string) { defer testutil.AfterTest(t) clus := NewClusterV3(t, &ClusterConfig{Size: 1}) defer clus.Terminate(t) @@ -63,7 +71,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { time.Sleep(500 * time.Millisecond) expected := numPuts * len(putreq.Value) - beforeDefrag, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_bytes") + beforeDefrag, err := clus.Members[0].Metric(name + "_mvcc_db_total_size_in_bytes") if err != nil { t.Fatal(err) } @@ -74,7 +82,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { if bv < expected { t.Fatalf("expected db size greater than %d, got %d", expected, bv) } - beforeDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + beforeDefragInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } @@ -98,7 +106,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { } time.Sleep(500 * time.Millisecond) - afterCompactionInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + afterCompactionInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } @@ -113,7 +121,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { // defrag should give freed space back to fs mc.Defragment(context.TODO(), &pb.DefragmentRequest{}) - afterDefrag, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_bytes") + afterDefrag, err := clus.Members[0].Metric(name + "_mvcc_db_total_size_in_bytes") if err != nil { t.Fatal(err) } @@ -125,7 +133,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { t.Fatalf("expected less than %d, got %d after defrag", bv, av) } - afterDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + afterDefragInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } diff --git a/mvcc/kvstore.go b/mvcc/kvstore.go index a445f6a4a..9c7f5c3ad 100644 --- a/mvcc/kvstore.go +++ b/mvcc/kvstore.go @@ -323,6 +323,9 @@ func (s *store) restore() error { reportDbTotalSizeInBytesMu.Lock() reportDbTotalSizeInBytes = func() float64 { return float64(b.Size()) } reportDbTotalSizeInBytesMu.Unlock() + reportDbTotalSizeInBytesDebuggingMu.Lock() + reportDbTotalSizeInBytesDebugging = func() float64 { return float64(b.Size()) } + reportDbTotalSizeInBytesDebuggingMu.Unlock() reportDbTotalSizeInUseInBytesMu.Lock() reportDbTotalSizeInUseInBytes = func() float64 { return float64(b.SizeInUse()) } reportDbTotalSizeInUseInBytesMu.Unlock() diff --git a/mvcc/metrics.go b/mvcc/metrics.go index f80c70c85..9163cc7c6 100644 --- a/mvcc/metrics.go +++ b/mvcc/metrics.go @@ -146,7 +146,7 @@ var ( }) dbTotalSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Namespace: "etcd_debugging", + Namespace: "etcd", Subsystem: "mvcc", Name: "db_total_size_in_bytes", Help: "Total size of the underlying database physically allocated in bytes.", @@ -159,10 +159,27 @@ var ( ) // overridden by mvcc initialization reportDbTotalSizeInBytesMu sync.RWMutex - reportDbTotalSizeInBytes func() float64 = func() float64 { return 0 } + reportDbTotalSizeInBytes = func() float64 { return 0 } + + // TODO: remove this in v3.5 + dbTotalSizeDebugging = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: "etcd_debugging", + Subsystem: "mvcc", + Name: "db_total_size_in_bytes", + Help: "Total size of the underlying database physically allocated in bytes.", + }, + func() float64 { + reportDbTotalSizeInBytesDebuggingMu.RLock() + defer reportDbTotalSizeInBytesDebuggingMu.RUnlock() + return reportDbTotalSizeInBytesDebugging() + }, + ) + // overridden by mvcc initialization + reportDbTotalSizeInBytesDebuggingMu sync.RWMutex + reportDbTotalSizeInBytesDebugging = func() float64 { return 0 } dbTotalSizeInUse = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Namespace: "etcd_debugging", + Namespace: "etcd", Subsystem: "mvcc", Name: "db_total_size_in_use_in_bytes", Help: "Total size of the underlying database logically in use in bytes.", @@ -218,6 +235,7 @@ func init() { prometheus.MustRegister(dbCompactionTotalMs) prometheus.MustRegister(dbCompactionKeysCounter) prometheus.MustRegister(dbTotalSize) + prometheus.MustRegister(dbTotalSizeDebugging) prometheus.MustRegister(dbTotalSizeInUse) prometheus.MustRegister(hashSec) prometheus.MustRegister(hashRevSec)