Merge pull request #17983 from thedtripp/feature/addServerRangeDurationMetrics

etcdserver: add server range duration metrics
This commit is contained in:
James Blair 2024-05-19 10:04:29 +12:00 committed by GitHub
commit 52fb28c1a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 118 additions and 0 deletions

View File

@ -91,6 +91,7 @@ See [List of metrics](https://etcd.io/docs/latest/metrics/) for all metrics per
- Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13371).
- Add [`etcd_debugging_server_alarms`](https://github.com/etcd-io/etcd/pull/14276).
- Add [`etcd_server_range_duration_seconds`](https://github.com/etcd-io/etcd/pull/17983).
### Go
- Require [Go 1.22+](https://github.com/etcd-io/etcd/pull/16594).

View File

@ -39,13 +39,29 @@ var (
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20),
},
[]string{"version", "op", "success"})
rangeSec = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "range_duration_seconds",
Help: "The latency distributions of txn.Range",
// lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2
// highest bucket start of 0.0001 sec * 2^19 == 52.4288 sec
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20),
},
[]string{"success"})
)
func ApplySecObserve(version, op string, success bool, latency time.Duration) {
applySec.WithLabelValues(version, op, strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0)
}
func RangeSecObserve(success bool, latency time.Duration) {
rangeSec.WithLabelValues(strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0)
}
func init() {
prometheus.MustRegister(applySec)
prometheus.MustRegister(rangeSec)
prometheus.MustRegister(slowApplies)
}

View File

@ -0,0 +1,62 @@
// Copyright 2022 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package txn
import (
"strings"
"testing"
"time"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
)
func TestRangeSecObserve(t *testing.T) {
// Simulate a range operation taking 500 milliseconds.
latency := 500 * time.Millisecond
RangeSecObserve(true, latency)
// Use testutil to collect the results and check against expected value
expected := `
# HELP etcd_server_range_duration_seconds The latency distributions of txn.Range
# TYPE etcd_server_range_duration_seconds histogram
etcd_server_range_duration_seconds_bucket{success="true",le="0.0001"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0002"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0004"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0008"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0016"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0032"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0064"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0128"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0256"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0512"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.1024"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.2048"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.4096"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.8192"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="1.6384"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="3.2768"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="6.5536"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="13.1072"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="26.2144"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="52.4288"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="+Inf"} 1
etcd_server_range_duration_seconds_sum{success="true"} 0.5
etcd_server_range_duration_seconds_count{success="true"} 1
`
err := testutil.CollectAndCompare(rangeSec, strings.NewReader(expected))
require.NoError(t, err, "Collected metrics did not match expected metrics: %v", err)
}

View File

@ -19,6 +19,7 @@ import (
"context"
"fmt"
"sort"
"time"
"go.uber.org/zap"
@ -138,6 +139,10 @@ func Range(ctx context.Context, lg *zap.Logger, kv mvcc.KV, r *pb.RangeRequest)
trace = traceutil.New("range", lg)
ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
}
defer func(start time.Time) {
success := err == nil
RangeSecObserve(success, time.Since(start))
}(time.Now())
txnRead := kv.Read(mvcc.ConcurrentReadTxMode, trace)
defer txnRead.End()
resp, err = executeRange(ctx, lg, txnRead, r)

View File

@ -22,10 +22,14 @@ import (
"testing"
"time"
"github.com/stretchr/testify/require"
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
"go.etcd.io/etcd/client/pkg/v3/transport"
"go.etcd.io/etcd/server/v3/storage"
"go.etcd.io/etcd/tests/v3/framework/integration"
clientv3 "go.etcd.io/etcd/client/v3"
)
// TestMetricDbSizeBoot checks that the db size metric is set on boot.
@ -211,3 +215,33 @@ func TestMetricsHealth(t *testing.T) {
t.Fatalf("expected '0' from etcd_server_health_failures, got %q", hv)
}
}
func TestMetricsRangeDurationSeconds(t *testing.T) {
integration.BeforeTest(t)
clus := integration.NewCluster(t, &integration.ClusterConfig{Size: 1})
defer clus.Terminate(t)
client := clus.RandClient()
keys := []string{
"my-namespace/foobar", "my-namespace/foobar1", "namespace/foobar1"}
for _, key := range keys {
_, err := client.Put(context.Background(), key, "data")
require.NoError(t, err)
}
_, err := client.Get(context.Background(), "", clientv3.WithFromKey())
require.NoError(t, err)
rangeDurationSeconds, err := clus.Members[0].Metric("etcd_server_range_duration_seconds")
require.NoError(t, err)
require.NotEmpty(t, rangeDurationSeconds, "expected a number from etcd_server_range_duration_seconds")
rangeDuration, err := strconv.ParseFloat(rangeDurationSeconds, 64)
require.NoError(t, err, "failed to parse duration: %s", err)
maxRangeDuration := 600.0
require.GreaterOrEqual(t, rangeDuration, 0.0, "expected etcd_server_range_duration_seconds to be between 0 and %f, got %f", maxRangeDuration, rangeDuration)
require.LessOrEqual(t, rangeDuration, maxRangeDuration, "expected etcd_server_range_duration_seconds to be between 0 and %f, got %f", maxRangeDuration, rangeDuration)
}