From cc44646a2ecaf0f12ff583d978c7b8b4c769b925 Mon Sep 17 00:00:00 2001 From: Siyuan Zhang Date: Tue, 12 Dec 2023 15:39:09 -0800 Subject: [PATCH] server: Cover V3 health with tests Signed-off-by: Siyuan Zhang --- embed/etcd.go | 1 + etcdserver/api/etcdhttp/base.go | 1 - etcdserver/api/etcdhttp/metrics.go | 41 +++++++++++++++--------- etcdserver/api/etcdhttp/metrics_test.go | 42 +++++++++++++++++++++---- etcdserver/api/v2http/client.go | 1 + etcdserver/server.go | 4 +++ 6 files changed, 69 insertions(+), 21 deletions(-) diff --git a/embed/etcd.go b/embed/etcd.go index 223a8aaea..29f65424b 100644 --- a/embed/etcd.go +++ b/embed/etcd.go @@ -773,6 +773,7 @@ func (e *Etcd) serveClients() (err error) { } else { mux := http.NewServeMux() etcdhttp.HandleBasic(mux, e.Server) + etcdhttp.HandleMetricsHealth(mux, e.Server) h = mux } diff --git a/etcdserver/api/etcdhttp/base.go b/etcdserver/api/etcdhttp/base.go index c9df62ea8..4ff4b9b02 100644 --- a/etcdserver/api/etcdhttp/base.go +++ b/etcdserver/api/etcdhttp/base.go @@ -51,7 +51,6 @@ func HandleBasic(mux *http.ServeMux, server etcdserver.ServerPeer) { // TODO: deprecate '/config/local/log' in v3.5 mux.HandleFunc(configPath+"/local/log", logHandleFunc) - HandleMetricsHealth(mux, server) mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion)) } diff --git a/etcdserver/api/etcdhttp/metrics.go b/etcdserver/api/etcdhttp/metrics.go index 4c319eb8c..15a4917f2 100644 --- a/etcdserver/api/etcdhttp/metrics.go +++ b/etcdserver/api/etcdhttp/metrics.go @@ -23,7 +23,8 @@ import ( "go.etcd.io/etcd/auth" "go.etcd.io/etcd/etcdserver" - "go.etcd.io/etcd/etcdserver/etcdserverpb" + pb "go.etcd.io/etcd/etcdserver/etcdserverpb" + "go.etcd.io/etcd/pkg/types" "go.etcd.io/etcd/raft" "github.com/prometheus/client_golang/prometheus" @@ -35,8 +36,19 @@ const ( PathHealth = "/health" ) -// HandleMetricsHealth registers metrics and health handlers. -func HandleMetricsHealth(mux *http.ServeMux, srv etcdserver.ServerV2) { +type ServerHealth interface { + serverHealthV2V3 + Range(context.Context, *pb.RangeRequest) (*pb.RangeResponse, error) + Config() etcdserver.ServerConfig +} + +type serverHealthV2V3 interface { + Alarms() []*pb.AlarmMember + Leader() types.ID +} + +// HandleMetricsHealthForV2 registers metrics and health handlers for v2. +func HandleMetricsHealthForV2(mux *http.ServeMux, srv etcdserver.ServerV2) { mux.Handle(PathMetrics, promhttp.Handler()) mux.Handle(PathHealth, NewHealthHandler(func(excludedAlarms AlarmSet, serializable bool) Health { if h := checkAlarms(srv, excludedAlarms); h.Health != "true" { @@ -49,9 +61,9 @@ func HandleMetricsHealth(mux *http.ServeMux, srv etcdserver.ServerV2) { })) } -// HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request +// HandleMetricsHealth registers metrics and health handlers. it checks health by using v3 range request // and its corresponding timeout. -func HandleMetricsHealthForV3(mux *http.ServeMux, srv *etcdserver.EtcdServer) { +func HandleMetricsHealth(mux *http.ServeMux, srv ServerHealth) { mux.Handle(PathMetrics, promhttp.Handler()) mux.Handle(PathHealth, NewHealthHandler(func(excludedAlarms AlarmSet, serializable bool) Health { if h := checkAlarms(srv, excludedAlarms); h.Health != "true" { @@ -60,7 +72,7 @@ func HandleMetricsHealthForV3(mux *http.ServeMux, srv *etcdserver.EtcdServer) { if h := checkLeader(srv, serializable); h.Health != "true" { return h } - return checkV3API(srv, serializable) + return checkAPI(srv, serializable) })) } @@ -152,7 +164,7 @@ func getSerializableFlag(r *http.Request) bool { // TODO: etcdserver.ErrNoLeader in health API -func checkAlarms(srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { +func checkAlarms(srv serverHealthV2V3, excludedAlarms AlarmSet) Health { h := Health{Health: "true"} as := srv.Alarms() if len(as) > 0 { @@ -165,9 +177,9 @@ func checkAlarms(srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { h.Health = "false" switch v.Alarm { - case etcdserverpb.AlarmType_NOSPACE: + case pb.AlarmType_NOSPACE: h.Reason = "ALARM NOSPACE" - case etcdserverpb.AlarmType_CORRUPT: + case pb.AlarmType_CORRUPT: h.Reason = "ALARM CORRUPT" default: h.Reason = "ALARM UNKNOWN" @@ -180,7 +192,7 @@ func checkAlarms(srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { return h } -func checkLeader(srv etcdserver.ServerV2, serializable bool) Health { +func checkLeader(srv serverHealthV2V3, serializable bool) Health { h := Health{Health: "true"} if !serializable && (uint64(srv.Leader()) == raft.None) { h.Health = "false" @@ -193,7 +205,7 @@ func checkLeader(srv etcdserver.ServerV2, serializable bool) Health { func checkV2API(srv etcdserver.ServerV2) Health { h := Health{Health: "true"} ctx, cancel := context.WithTimeout(context.Background(), time.Second) - _, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"}) + _, err := srv.Do(ctx, pb.Request{Method: "QGET"}) cancel() if err != nil { h.Health = "false" @@ -204,10 +216,11 @@ func checkV2API(srv etcdserver.ServerV2) Health { return h } -func checkV3API(srv *etcdserver.EtcdServer, serializable bool) Health { +func checkAPI(srv ServerHealth, serializable bool) Health { h := Health{Health: "true"} - ctx, cancel := context.WithTimeout(context.Background(), srv.Cfg.ReqTimeout()) - _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) + cfg := srv.Config() + ctx, cancel := context.WithTimeout(context.Background(), cfg.ReqTimeout()) + _, err := srv.Range(ctx, &pb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) cancel() if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied { h.Health = "false" diff --git a/etcdserver/api/etcdhttp/metrics_test.go b/etcdserver/api/etcdhttp/metrics_test.go index 8d825d9ee..0fc658534 100644 --- a/etcdserver/api/etcdhttp/metrics_test.go +++ b/etcdserver/api/etcdhttp/metrics_test.go @@ -24,6 +24,7 @@ import ( "net/http/httptest" "testing" + "go.etcd.io/etcd/auth" "go.etcd.io/etcd/etcdserver" stats "go.etcd.io/etcd/etcdserver/api/v2stats" pb "go.etcd.io/etcd/etcdserver/etcdserverpb" @@ -38,25 +39,34 @@ func (s *fakeStats) SelfStats() []byte { return nil } func (s *fakeStats) LeaderStats() []byte { return nil } func (s *fakeStats) StoreStats() []byte { return nil } -type fakeServerV2 struct { +type fakeHealthServer struct { fakeServer stats.Stats - health string + health string + apiError error } -func (s *fakeServerV2) Leader() types.ID { +func (s *fakeHealthServer) Range(ctx context.Context, request *pb.RangeRequest) (*pb.RangeResponse, error) { + return nil, s.apiError +} + +func (s *fakeHealthServer) Config() etcdserver.ServerConfig { + return etcdserver.ServerConfig{} +} + +func (s *fakeHealthServer) Leader() types.ID { if s.health == "true" { return 1 } return types.ID(raft.None) } -func (s *fakeServerV2) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) { +func (s *fakeHealthServer) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) { if s.health == "true" { return etcdserver.Response{}, nil } return etcdserver.Response{}, fmt.Errorf("fail health check") } -func (s *fakeServerV2) ClientCertAuthEnabled() bool { return false } +func (s *fakeHealthServer) ClientCertAuthEnabled() bool { return false } func TestHealthHandler(t *testing.T) { // define the input and expected output @@ -65,6 +75,7 @@ func TestHealthHandler(t *testing.T) { name string alarms []*pb.AlarmMember healthCheckURL string + apiError error expectStatusCode int expectHealth string @@ -118,15 +129,34 @@ func TestHealthHandler(t *testing.T) { expectStatusCode: http.StatusOK, expectHealth: "true", }, + { + healthCheckURL: "/health", + apiError: auth.ErrUserEmpty, + expectStatusCode: http.StatusOK, + expectHealth: "true", + }, + { + healthCheckURL: "/health", + apiError: auth.ErrPermissionDenied, + expectStatusCode: http.StatusOK, + expectHealth: "true", + }, + { + healthCheckURL: "/health", + apiError: fmt.Errorf("Unexpected error"), + expectStatusCode: http.StatusServiceUnavailable, + expectHealth: "false", + }, } for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { mux := http.NewServeMux() - HandleMetricsHealth(mux, &fakeServerV2{ + HandleMetricsHealth(mux, &fakeHealthServer{ fakeServer: fakeServer{alarms: tt.alarms}, Stats: &fakeStats{}, health: tt.expectHealth, + apiError: tt.apiError, }) ts := httptest.NewServer(mux) defer ts.Close() diff --git a/etcdserver/api/v2http/client.go b/etcdserver/api/v2http/client.go index 1d1e592b2..a26e748fe 100644 --- a/etcdserver/api/v2http/client.go +++ b/etcdserver/api/v2http/client.go @@ -55,6 +55,7 @@ const ( func NewClientHandler(lg *zap.Logger, server etcdserver.ServerPeer, timeout time.Duration) http.Handler { mux := http.NewServeMux() etcdhttp.HandleBasic(mux, server) + etcdhttp.HandleMetricsHealthForV2(mux, server) handleV2(lg, mux, server, timeout) return requestLogger(lg, mux) } diff --git a/etcdserver/server.go b/etcdserver/server.go index d24963f9d..c044e3d44 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -667,6 +667,10 @@ func (s *EtcdServer) getLogger() *zap.Logger { return l } +func (s *EtcdServer) Config() ServerConfig { + return s.Cfg +} + func tickToDur(ticks int, tickMs uint) string { return fmt.Sprintf("%v", time.Duration(ticks)*time.Duration(tickMs)*time.Millisecond) }