diff --git a/server/embed/etcd.go b/server/embed/etcd.go index e955efd92..becdc666f 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -702,7 +702,7 @@ func (e *Etcd) serveClients() (err error) { // Start a client server goroutine for each listen address mux := http.NewServeMux() etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server) - etcdhttp.HandleMetricsHealthForV3(e.cfg.logger, mux, e.Server) + etcdhttp.HandleMetricsHealth(e.cfg.logger, mux, e.Server) gopts := []grpc.ServerOption{} if e.cfg.GRPCKeepAliveMinTime > time.Duration(0) { @@ -735,7 +735,7 @@ func (e *Etcd) serveMetrics() (err error) { if len(e.cfg.ListenMetricsUrls) > 0 { metricsMux := http.NewServeMux() - etcdhttp.HandleMetricsHealthForV3(e.cfg.logger, metricsMux, e.Server) + etcdhttp.HandleMetricsHealth(e.cfg.logger, metricsMux, e.Server) for _, murl := range e.cfg.ListenMetricsUrls { tlsInfo := &e.cfg.ClientTLSInfo diff --git a/server/etcdserver/api/etcdhttp/metrics.go b/server/etcdserver/api/etcdhttp/metrics.go index 54e76552d..5b84a03bc 100644 --- a/server/etcdserver/api/etcdhttp/metrics.go +++ b/server/etcdserver/api/etcdhttp/metrics.go @@ -19,14 +19,15 @@ import ( "encoding/json" "fmt" "net/http" - "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "go.etcd.io/etcd/api/v3/etcdserverpb" + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/auth" - "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/etcd/server/v3/config" "go.uber.org/zap" ) @@ -37,23 +38,16 @@ const ( PathProxyHealth = "/proxy/health" ) -// HandleMetricsHealth registers metrics and health handlers. -func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) { - mux.Handle(PathMetrics, promhttp.Handler()) - mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { - if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { - return h - } - if h := checkLeader(lg, srv, serializable); h.Health != "true" { - return h - } - return checkV2API(lg, srv) - })) +type ServerHealth interface { + Alarms() []*pb.AlarmMember + Leader() types.ID + Range(context.Context, *pb.RangeRequest) (*pb.RangeResponse, error) + Config() config.ServerConfig } -// HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request +// HandleMetricsHealth registers metrics and health handlers. it checks health by using v3 range request // and its corresponding timeout. -func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) { +func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv ServerHealth) { mux.Handle(PathMetrics, promhttp.Handler()) mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { @@ -62,7 +56,7 @@ func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserve if h := checkLeader(lg, srv, serializable); h.Health != "true" { return h } - return checkV3API(lg, srv, serializable) + return checkAPI(lg, srv, serializable) })) } @@ -155,7 +149,7 @@ func getSerializableFlag(r *http.Request) bool { // TODO: etcdserver.ErrNoLeader in health API -func checkAlarms(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { +func checkAlarms(lg *zap.Logger, srv ServerHealth, excludedAlarms AlarmSet) Health { h := Health{Health: "true"} as := srv.Alarms() if len(as) > 0 { @@ -183,7 +177,7 @@ func checkAlarms(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSe return h } -func checkLeader(lg *zap.Logger, srv etcdserver.ServerV2, serializable bool) Health { +func checkLeader(lg *zap.Logger, srv ServerHealth, serializable bool) Health { h := Health{Health: "true"} if !serializable && (uint64(srv.Leader()) == raft.None) { h.Health = "false" @@ -193,24 +187,10 @@ func checkLeader(lg *zap.Logger, srv etcdserver.ServerV2, serializable bool) Hea return h } -func checkV2API(lg *zap.Logger, srv etcdserver.ServerV2) Health { +func checkAPI(lg *zap.Logger, srv ServerHealth, serializable bool) Health { h := Health{Health: "true"} - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - _, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"}) - cancel() - if err != nil { - h.Health = "false" - h.Reason = fmt.Sprintf("QGET ERROR:%s", err) - lg.Warn("serving /health false; QGET fails", zap.Error(err)) - return h - } - lg.Debug("serving /health true") - return h -} - -func checkV3API(lg *zap.Logger, srv *etcdserver.EtcdServer, serializable bool) Health { - h := Health{Health: "true"} - ctx, cancel := context.WithTimeout(context.Background(), srv.Cfg.ReqTimeout()) + cfg := srv.Config() + ctx, cancel := context.WithTimeout(context.Background(), cfg.ReqTimeout()) _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) cancel() if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied { diff --git a/server/etcdserver/api/etcdhttp/metrics_test.go b/server/etcdserver/api/etcdhttp/metrics_test.go index bd4f156df..788a038aa 100644 --- a/server/etcdserver/api/etcdhttp/metrics_test.go +++ b/server/etcdserver/api/etcdhttp/metrics_test.go @@ -13,6 +13,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/raft/v3" + "go.etcd.io/etcd/server/v3/auth" + "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver" "go.uber.org/zap/zaptest" ) @@ -23,24 +25,33 @@ func (s *fakeStats) SelfStats() []byte { return nil } func (s *fakeStats) LeaderStats() []byte { return nil } func (s *fakeStats) StoreStats() []byte { return nil } -type fakeServerV2 struct { +type fakeHealthServer struct { fakeServer - health string + health string + apiError error } -func (s *fakeServerV2) Leader() types.ID { +func (s *fakeHealthServer) Range(ctx context.Context, request *pb.RangeRequest) (*pb.RangeResponse, error) { + return nil, s.apiError +} + +func (s *fakeHealthServer) Config() config.ServerConfig { + return config.ServerConfig{} +} + +func (s *fakeHealthServer) Leader() types.ID { if s.health == "true" { return 1 } return types.ID(raft.None) } -func (s *fakeServerV2) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) { +func (s *fakeHealthServer) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) { if s.health == "true" { return etcdserver.Response{}, nil } return etcdserver.Response{}, fmt.Errorf("fail health check") } -func (s *fakeServerV2) ClientCertAuthEnabled() bool { return false } +func (s *fakeHealthServer) ClientCertAuthEnabled() bool { return false } func TestHealthHandler(t *testing.T) { // define the input and expected output @@ -49,6 +60,7 @@ func TestHealthHandler(t *testing.T) { name string alarms []*pb.AlarmMember healthCheckURL string + apiError error expectStatusCode int expectHealth string @@ -102,14 +114,33 @@ func TestHealthHandler(t *testing.T) { expectStatusCode: http.StatusOK, expectHealth: "true", }, + { + healthCheckURL: "/health", + apiError: auth.ErrUserEmpty, + expectStatusCode: http.StatusOK, + expectHealth: "true", + }, + { + healthCheckURL: "/health", + apiError: auth.ErrPermissionDenied, + expectStatusCode: http.StatusOK, + expectHealth: "true", + }, + { + healthCheckURL: "/health", + apiError: fmt.Errorf("Unexpected error"), + expectStatusCode: http.StatusServiceUnavailable, + expectHealth: "false", + }, } for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { mux := http.NewServeMux() - HandleMetricsHealth(zaptest.NewLogger(t), mux, &fakeServerV2{ + HandleMetricsHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{ fakeServer: fakeServer{alarms: tt.alarms}, health: tt.expectHealth, + apiError: tt.apiError, }) ts := httptest.NewServer(mux) defer ts.Close() diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index c79055feb..3c50719bc 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -445,6 +445,10 @@ func (s *EtcdServer) Logger() *zap.Logger { return l } +func (s *EtcdServer) Config() config.ServerConfig { + return s.Cfg +} + func tickToDur(ticks int, tickMs uint) string { return fmt.Sprintf("%v", time.Duration(ticks)*time.Duration(tickMs)*time.Millisecond) } diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index 26fe39daf..51c5525da 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -993,7 +993,7 @@ func (m *Member) Launch() error { for _, ln := range m.ClientListeners { handler := http.NewServeMux() etcdhttp.HandleBasic(m.Logger, handler, m.Server) - etcdhttp.HandleMetricsHealthForV3(m.Logger, handler, m.Server) + etcdhttp.HandleMetricsHealth(m.Logger, handler, m.Server) hs := &httptest.Server{ Listener: ln, Config: &http.Server{