diff --git a/etcdserver/api/etcdhttp/metrics.go b/etcdserver/api/etcdhttp/metrics.go index bec5e9e05..4d058e467 100644 --- a/etcdserver/api/etcdhttp/metrics.go +++ b/etcdserver/api/etcdhttp/metrics.go @@ -24,6 +24,7 @@ import ( "go.etcd.io/etcd/etcdserver/etcdserverpb" "go.etcd.io/etcd/raft" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -43,11 +44,6 @@ func HandlePrometheus(mux *http.ServeMux) { mux.Handle(pathMetrics, promhttp.Handler()) } -// HandleHealth registers health handler on '/health'. -func HandleHealth(mux *http.ServeMux, srv etcdserver.ServerV2) { - mux.Handle(PathHealth, NewHealthHandler(func() Health { return checkHealth(srv) })) -} - // NewHealthHandler handles '/health' requests. func NewHealthHandler(hfunc func() Health) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { @@ -67,6 +63,26 @@ func NewHealthHandler(hfunc func() Health) http.HandlerFunc { } } +var ( + healthSuccess = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "health_success", + Help: "The total number of successful health checks", + }) + healthFailed = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "health_failures", + Help: "The total number of failed health checks", + }) +) + +func init() { + prometheus.MustRegister(healthSuccess) + prometheus.MustRegister(healthFailed) +} + // Health defines etcd server health status. // TODO: remove manual parsing in etcdctl cluster-health type Health struct { @@ -97,5 +113,11 @@ func checkHealth(srv etcdserver.ServerV2) Health { h.Health = "false" } } + + if h.Health == "true" { + healthSuccess.Inc() + } else { + healthFailed.Inc() + } return h } diff --git a/integration/metrics_test.go b/integration/metrics_test.go index 70dcf7b03..207e437cb 100644 --- a/integration/metrics_test.go +++ b/integration/metrics_test.go @@ -16,14 +16,15 @@ package integration import ( "context" + "net/http" "strconv" "testing" "time" "go.etcd.io/etcd/etcdserver" - pb "go.etcd.io/etcd/etcdserver/etcdserverpb" "go.etcd.io/etcd/pkg/testutil" + "go.etcd.io/etcd/pkg/transport" ) // TestMetricDbSizeBoot checks that the db size metric is set on boot. @@ -165,3 +166,33 @@ func TestMetricQuotaBackendBytes(t *testing.T) { t.Fatalf("expected %d, got %f", etcdserver.DefaultQuotaBytes, qv) } } + +func TestMetricsHealth(t *testing.T) { + defer testutil.AfterTest(t) + clus := NewClusterV3(t, &ClusterConfig{Size: 1}) + defer clus.Terminate(t) + + tr, err := transport.NewTransport(transport.TLSInfo{}, 5*time.Second) + if err != nil { + t.Fatal(err) + } + u := clus.Members[0].ClientURLs[0] + u.Path = "/health" + resp, err := tr.RoundTrip(&http.Request{ + Header: make(http.Header), + Method: http.MethodGet, + URL: &u, + }) + resp.Body.Close() + if err != nil { + t.Fatal(err) + } + + hv, err := clus.Members[0].Metric("etcd_server_health_success") + if err != nil { + t.Fatal(err) + } + if hv != "1" { + t.Fatalf("expected '1' from /health, got %q", hv) + } +}