From 0096d2ecdb2c753d7150e48681769583a937da7b Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 27 Jan 2022 14:14:54 +0100 Subject: [PATCH 1/9] server: Remove unused NewClientHandler --- server/etcdserver/api/etcdhttp/base.go | 2 +- .../httptypes => etcdhttp/types}/errors.go | 0 .../types}/errors_test.go | 0 server/etcdserver/api/v2http/client.go | 50 ------------------- server/etcdserver/server.go | 2 +- server/proxy/httpproxy/reverse.go | 2 +- tests/framework/integration/cluster.go | 10 ++-- 7 files changed, 7 insertions(+), 59 deletions(-) rename server/etcdserver/api/{v2http/httptypes => etcdhttp/types}/errors.go (100%) rename server/etcdserver/api/{v2http/httptypes => etcdhttp/types}/errors_test.go (100%) delete mode 100644 server/etcdserver/api/v2http/client.go diff --git a/server/etcdserver/api/etcdhttp/base.go b/server/etcdserver/api/etcdhttp/base.go index 06067cc44..9dc313fc1 100644 --- a/server/etcdserver/api/etcdhttp/base.go +++ b/server/etcdserver/api/etcdhttp/base.go @@ -23,8 +23,8 @@ import ( "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/server/v3/etcdserver" "go.etcd.io/etcd/server/v3/etcdserver/api" + "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types" "go.etcd.io/etcd/server/v3/etcdserver/api/v2error" - "go.etcd.io/etcd/server/v3/etcdserver/api/v2http/httptypes" "go.uber.org/zap" ) diff --git a/server/etcdserver/api/v2http/httptypes/errors.go b/server/etcdserver/api/etcdhttp/types/errors.go similarity index 100% rename from server/etcdserver/api/v2http/httptypes/errors.go rename to server/etcdserver/api/etcdhttp/types/errors.go diff --git a/server/etcdserver/api/v2http/httptypes/errors_test.go b/server/etcdserver/api/etcdhttp/types/errors_test.go similarity index 100% rename from server/etcdserver/api/v2http/httptypes/errors_test.go rename to server/etcdserver/api/etcdhttp/types/errors_test.go diff --git a/server/etcdserver/api/v2http/client.go b/server/etcdserver/api/v2http/client.go deleted file mode 100644 index 7b56da357..000000000 --- a/server/etcdserver/api/v2http/client.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package v2http provides etcd client and server implementations. -package v2http - -import ( - "net/http" - "time" - - "go.etcd.io/etcd/server/v3/etcdserver" - "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp" - "go.uber.org/zap" -) - -// NewClientHandler generates a muxed http.Handler with the given parameters to serve etcd client requests. -func NewClientHandler(lg *zap.Logger, server etcdserver.ServerPeer, timeout time.Duration) http.Handler { - if lg == nil { - lg = zap.NewNop() - } - mux := http.NewServeMux() - etcdhttp.HandleBasic(lg, mux, server) - etcdhttp.HandleMetricsHealth(lg, mux, server) - return requestLogger(lg, mux) -} - -func requestLogger(lg *zap.Logger, handler http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if lg != nil { - lg.Debug( - "handling HTTP request", - zap.String("method", r.Method), - zap.String("request-uri", r.RequestURI), - zap.String("remote-addr", r.RemoteAddr), - ) - } - handler.ServeHTTP(w, r) - }) -} diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 1c8fd317e..c79055feb 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -51,10 +51,10 @@ import ( "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/auth" "go.etcd.io/etcd/server/v3/etcdserver/api" + "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" - "go.etcd.io/etcd/server/v3/etcdserver/api/v2http/httptypes" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" "go.etcd.io/etcd/server/v3/etcdserver/api/v2store" "go.etcd.io/etcd/server/v3/etcdserver/api/v3alarm" diff --git a/server/proxy/httpproxy/reverse.go b/server/proxy/httpproxy/reverse.go index 95a7e653f..0db63f938 100644 --- a/server/proxy/httpproxy/reverse.go +++ b/server/proxy/httpproxy/reverse.go @@ -26,7 +26,7 @@ import ( "sync/atomic" "time" - "go.etcd.io/etcd/server/v3/etcdserver/api/v2http/httptypes" + "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types" "go.uber.org/zap" ) diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index 594aafa4a..26fe39daf 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -47,7 +47,6 @@ import ( "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" - "go.etcd.io/etcd/server/v3/etcdserver/api/v2http" "go.etcd.io/etcd/server/v3/etcdserver/api/v3client" "go.etcd.io/etcd/server/v3/etcdserver/api/v3election" epb "go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb" @@ -992,14 +991,13 @@ func (m *Member) Launch() error { m.ServerClosers = append(m.ServerClosers, closer) } for _, ln := range m.ClientListeners { + handler := http.NewServeMux() + etcdhttp.HandleBasic(m.Logger, handler, m.Server) + etcdhttp.HandleMetricsHealthForV3(m.Logger, handler, m.Server) hs := &httptest.Server{ Listener: ln, Config: &http.Server{ - Handler: v2http.NewClientHandler( - m.Logger, - m.Server, - m.ServerConfig.ReqTimeout(), - ), + Handler: handler, ErrorLog: log.New(io.Discard, "net/http", 0), }, } From 0fb194d6f29135358c7de6f02b85a26fc40caf73 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 27 Jan 2022 18:25:39 +0100 Subject: [PATCH 2/9] server: Use named struct initialization in healthcheck test --- .../etcdserver/api/etcdhttp/metrics_test.go | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/server/etcdserver/api/etcdhttp/metrics_test.go b/server/etcdserver/api/etcdhttp/metrics_test.go index 9c07cca59..d365b801c 100644 --- a/server/etcdserver/api/etcdhttp/metrics_test.go +++ b/server/etcdserver/api/etcdhttp/metrics_test.go @@ -52,46 +52,46 @@ func TestHealthHandler(t *testing.T) { health string }{ { - []*pb.AlarmMember{}, - "/health", - http.StatusOK, - "true", + alarms: []*pb.AlarmMember{}, + healthCheckURL: "/health", + statusCode: http.StatusOK, + health: "true", }, { - []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, - "/health", - http.StatusServiceUnavailable, - "false", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, + healthCheckURL: "/health", + statusCode: http.StatusServiceUnavailable, + health: "false", }, { - []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, - "/health?exclude=NOSPACE", - http.StatusOK, - "true", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, + healthCheckURL: "/health?exclude=NOSPACE", + statusCode: http.StatusOK, + health: "true", }, { - []*pb.AlarmMember{}, - "/health?exclude=NOSPACE", - http.StatusOK, - "true", + alarms: []*pb.AlarmMember{}, + healthCheckURL: "/health?exclude=NOSPACE", + statusCode: http.StatusOK, + health: "true", }, { - []*pb.AlarmMember{{MemberID: uint64(1), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(2), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(3), Alarm: pb.AlarmType_NOSPACE}}, - "/health?exclude=NOSPACE", - http.StatusOK, - "true", + alarms: []*pb.AlarmMember{{MemberID: uint64(1), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(2), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(3), Alarm: pb.AlarmType_NOSPACE}}, + healthCheckURL: "/health?exclude=NOSPACE", + statusCode: http.StatusOK, + health: "true", }, { - []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, - "/health?exclude=NOSPACE", - http.StatusServiceUnavailable, - "false", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, + healthCheckURL: "/health?exclude=NOSPACE", + statusCode: http.StatusServiceUnavailable, + health: "false", }, { - []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, - "/health?exclude=NOSPACE&exclude=CORRUPT", - http.StatusOK, - "true", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, + healthCheckURL: "/health?exclude=NOSPACE&exclude=CORRUPT", + statusCode: http.StatusOK, + health: "true", }, } From e4e391792ade576001fec9ef286fce5e70febd2f Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 27 Jan 2022 18:33:41 +0100 Subject: [PATCH 3/9] server: Rename test case expect fields --- .../etcdserver/api/etcdhttp/metrics_test.go | 71 ++++++++++--------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/server/etcdserver/api/etcdhttp/metrics_test.go b/server/etcdserver/api/etcdhttp/metrics_test.go index d365b801c..a36cc96c3 100644 --- a/server/etcdserver/api/etcdhttp/metrics_test.go +++ b/server/etcdserver/api/etcdhttp/metrics_test.go @@ -48,50 +48,51 @@ func TestHealthHandler(t *testing.T) { tests := []struct { alarms []*pb.AlarmMember healthCheckURL string - statusCode int - health string + + expectStatusCode int + expectHealth string }{ { - alarms: []*pb.AlarmMember{}, - healthCheckURL: "/health", - statusCode: http.StatusOK, - health: "true", + alarms: []*pb.AlarmMember{}, + healthCheckURL: "/health", + expectStatusCode: http.StatusOK, + expectHealth: "true", }, { - alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, - healthCheckURL: "/health", - statusCode: http.StatusServiceUnavailable, - health: "false", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, + healthCheckURL: "/health", + expectStatusCode: http.StatusServiceUnavailable, + expectHealth: "false", }, { - alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, - healthCheckURL: "/health?exclude=NOSPACE", - statusCode: http.StatusOK, - health: "true", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, + healthCheckURL: "/health?exclude=NOSPACE", + expectStatusCode: http.StatusOK, + expectHealth: "true", }, { - alarms: []*pb.AlarmMember{}, - healthCheckURL: "/health?exclude=NOSPACE", - statusCode: http.StatusOK, - health: "true", + alarms: []*pb.AlarmMember{}, + healthCheckURL: "/health?exclude=NOSPACE", + expectStatusCode: http.StatusOK, + expectHealth: "true", }, { - alarms: []*pb.AlarmMember{{MemberID: uint64(1), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(2), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(3), Alarm: pb.AlarmType_NOSPACE}}, - healthCheckURL: "/health?exclude=NOSPACE", - statusCode: http.StatusOK, - health: "true", + alarms: []*pb.AlarmMember{{MemberID: uint64(1), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(2), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(3), Alarm: pb.AlarmType_NOSPACE}}, + healthCheckURL: "/health?exclude=NOSPACE", + expectStatusCode: http.StatusOK, + expectHealth: "true", }, { - alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, - healthCheckURL: "/health?exclude=NOSPACE", - statusCode: http.StatusServiceUnavailable, - health: "false", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, + healthCheckURL: "/health?exclude=NOSPACE", + expectStatusCode: http.StatusServiceUnavailable, + expectHealth: "false", }, { - alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, - healthCheckURL: "/health?exclude=NOSPACE&exclude=CORRUPT", - statusCode: http.StatusOK, - health: "true", + alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, + healthCheckURL: "/health?exclude=NOSPACE&exclude=CORRUPT", + expectStatusCode: http.StatusOK, + expectHealth: "true", }, } @@ -100,7 +101,7 @@ func TestHealthHandler(t *testing.T) { mux := http.NewServeMux() HandleMetricsHealth(zaptest.NewLogger(t), mux, &fakeServerV2{ fakeServer: fakeServer{alarms: tt.alarms}, - health: tt.health, + health: tt.expectHealth, }) ts := httptest.NewServer(mux) defer ts.Close() @@ -113,15 +114,15 @@ func TestHealthHandler(t *testing.T) { t.Errorf("got nil http response with http request %s in test case #%d", tt.healthCheckURL, i+1) return } - if res.StatusCode != tt.statusCode { - t.Errorf("want statusCode %d but got %d in test case #%d", tt.statusCode, res.StatusCode, i+1) + if res.StatusCode != tt.expectStatusCode { + t.Errorf("want statusCode %d but got %d in test case #%d", tt.expectStatusCode, res.StatusCode, i+1) } health, err := parseHealthOutput(res.Body) if err != nil { t.Errorf("fail parse health check output %v", err) } - if health.Health != tt.health { - t.Errorf("want health %s but got %s", tt.health, health.Health) + if health.Health != tt.expectHealth { + t.Errorf("want health %s but got %s", tt.expectHealth, health.Health) } }() } From 191aed645ef003891fd5f492b27b4d3eb510d847 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 27 Jan 2022 18:46:44 +0100 Subject: [PATCH 4/9] server: Run health check tests in subtests --- server/etcdserver/api/etcdhttp/metrics_test.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/server/etcdserver/api/etcdhttp/metrics_test.go b/server/etcdserver/api/etcdhttp/metrics_test.go index a36cc96c3..bd4f156df 100644 --- a/server/etcdserver/api/etcdhttp/metrics_test.go +++ b/server/etcdserver/api/etcdhttp/metrics_test.go @@ -46,6 +46,7 @@ func TestHealthHandler(t *testing.T) { // define the input and expected output // input: alarms, and healthCheckURL tests := []struct { + name string alarms []*pb.AlarmMember healthCheckURL string @@ -53,42 +54,49 @@ func TestHealthHandler(t *testing.T) { expectHealth string }{ { + name: "Healthy if no alarm", alarms: []*pb.AlarmMember{}, healthCheckURL: "/health", expectStatusCode: http.StatusOK, expectHealth: "true", }, { + name: "Unhealthy if NOSPACE alarm is on", alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, healthCheckURL: "/health", expectStatusCode: http.StatusServiceUnavailable, expectHealth: "false", }, { + name: "Healthy if NOSPACE alarm is on and excluded", alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}}, healthCheckURL: "/health?exclude=NOSPACE", expectStatusCode: http.StatusOK, expectHealth: "true", }, { + name: "Healthy if NOSPACE alarm is excluded", alarms: []*pb.AlarmMember{}, healthCheckURL: "/health?exclude=NOSPACE", expectStatusCode: http.StatusOK, expectHealth: "true", }, { + name: "Healthy if multiple NOSPACE alarms are on and excluded", alarms: []*pb.AlarmMember{{MemberID: uint64(1), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(2), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(3), Alarm: pb.AlarmType_NOSPACE}}, healthCheckURL: "/health?exclude=NOSPACE", expectStatusCode: http.StatusOK, expectHealth: "true", }, { + name: "Unhealthy if NOSPACE alarms is excluded and CORRUPT is on", alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, healthCheckURL: "/health?exclude=NOSPACE", expectStatusCode: http.StatusServiceUnavailable, expectHealth: "false", }, { + name: "Unhealthy if both NOSPACE and CORRUPT are on and excluded", alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}}, healthCheckURL: "/health?exclude=NOSPACE&exclude=CORRUPT", expectStatusCode: http.StatusOK, @@ -97,7 +105,7 @@ func TestHealthHandler(t *testing.T) { } for i, tt := range tests { - func() { + t.Run(tt.name, func(t *testing.T) { mux := http.NewServeMux() HandleMetricsHealth(zaptest.NewLogger(t), mux, &fakeServerV2{ fakeServer: fakeServer{alarms: tt.alarms}, @@ -124,7 +132,7 @@ func TestHealthHandler(t *testing.T) { if health.Health != tt.expectHealth { t.Errorf("want health %s but got %s", tt.expectHealth, health.Health) } - }() + }) } } From e9dec74ded6c4542a319e93b316161fa771d13e3 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 27 Jan 2022 16:34:06 +0100 Subject: [PATCH 5/9] server: Refactor health checks --- server/etcdserver/api/etcdhttp/metrics.go | 49 ++++++++++++++--------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/server/etcdserver/api/etcdhttp/metrics.go b/server/etcdserver/api/etcdhttp/metrics.go index fedf2a9e3..54e76552d 100644 --- a/server/etcdserver/api/etcdhttp/metrics.go +++ b/server/etcdserver/api/etcdhttp/metrics.go @@ -40,7 +40,15 @@ const ( // HandleMetricsHealth registers metrics and health handlers. func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) { mux.Handle(PathMetrics, promhttp.Handler()) - mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { return checkV2Health(lg, srv, excludedAlarms) })) + mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { + if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { + return h + } + if h := checkLeader(lg, srv, serializable); h.Health != "true" { + return h + } + return checkV2API(lg, srv) + })) } // HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request @@ -48,7 +56,13 @@ func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.Serv func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) { mux.Handle(PathMetrics, promhttp.Handler()) mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { - return checkV3Health(lg, srv, excludedAlarms, serializable) + if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { + return h + } + if h := checkLeader(lg, srv, serializable); h.Health != "true" { + return h + } + return checkV3API(lg, srv, serializable) })) } @@ -141,9 +155,8 @@ func getSerializableFlag(r *http.Request) bool { // TODO: etcdserver.ErrNoLeader in health API -func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet, serializable bool) Health { - h := Health{} - h.Health = "true" +func checkAlarms(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { + h := Health{Health: "true"} as := srv.Alarms() if len(as) > 0 { for _, v := range as { @@ -167,19 +180,21 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSe } } + return h +} + +func checkLeader(lg *zap.Logger, srv etcdserver.ServerV2, serializable bool) Health { + h := Health{Health: "true"} if !serializable && (uint64(srv.Leader()) == raft.None) { h.Health = "false" h.Reason = "RAFT NO LEADER" lg.Warn("serving /health false; no leader") - return h } return h } -func checkV2Health(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) (h Health) { - if h = checkHealth(lg, srv, excludedAlarms, false); h.Health != "true" { - return - } +func checkV2API(lg *zap.Logger, srv etcdserver.ServerV2) Health { + h := Health{Health: "true"} ctx, cancel := context.WithTimeout(context.Background(), time.Second) _, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"}) cancel() @@ -187,16 +202,14 @@ func checkV2Health(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms Alarm h.Health = "false" h.Reason = fmt.Sprintf("QGET ERROR:%s", err) lg.Warn("serving /health false; QGET fails", zap.Error(err)) - return + return h } lg.Debug("serving /health true") - return + return h } -func checkV3Health(lg *zap.Logger, srv *etcdserver.EtcdServer, excludedAlarms AlarmSet, serializable bool) (h Health) { - if h = checkHealth(lg, srv, excludedAlarms, serializable); h.Health != "true" { - return - } +func checkV3API(lg *zap.Logger, srv *etcdserver.EtcdServer, serializable bool) Health { + h := Health{Health: "true"} ctx, cancel := context.WithTimeout(context.Background(), srv.Cfg.ReqTimeout()) _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) cancel() @@ -204,8 +217,8 @@ func checkV3Health(lg *zap.Logger, srv *etcdserver.EtcdServer, excludedAlarms Al h.Health = "false" h.Reason = fmt.Sprintf("RANGE ERROR:%s", err) lg.Warn("serving /health false; Range fails", zap.Error(err)) - return + return h } lg.Debug("serving /health true") - return + return h } From 600ee13ac0c38d5b293c861e66748719e5e1d86c Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 27 Jan 2022 18:57:57 +0100 Subject: [PATCH 6/9] server: Cover V3 health with tests --- server/embed/etcd.go | 4 +- server/etcdserver/api/etcdhttp/metrics.go | 52 ++++++------------- .../etcdserver/api/etcdhttp/metrics_test.go | 43 ++++++++++++--- server/etcdserver/server.go | 4 ++ tests/framework/integration/cluster.go | 2 +- 5 files changed, 60 insertions(+), 45 deletions(-) diff --git a/server/embed/etcd.go b/server/embed/etcd.go index e955efd92..becdc666f 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -702,7 +702,7 @@ func (e *Etcd) serveClients() (err error) { // Start a client server goroutine for each listen address mux := http.NewServeMux() etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server) - etcdhttp.HandleMetricsHealthForV3(e.cfg.logger, mux, e.Server) + etcdhttp.HandleMetricsHealth(e.cfg.logger, mux, e.Server) gopts := []grpc.ServerOption{} if e.cfg.GRPCKeepAliveMinTime > time.Duration(0) { @@ -735,7 +735,7 @@ func (e *Etcd) serveMetrics() (err error) { if len(e.cfg.ListenMetricsUrls) > 0 { metricsMux := http.NewServeMux() - etcdhttp.HandleMetricsHealthForV3(e.cfg.logger, metricsMux, e.Server) + etcdhttp.HandleMetricsHealth(e.cfg.logger, metricsMux, e.Server) for _, murl := range e.cfg.ListenMetricsUrls { tlsInfo := &e.cfg.ClientTLSInfo diff --git a/server/etcdserver/api/etcdhttp/metrics.go b/server/etcdserver/api/etcdhttp/metrics.go index 54e76552d..5b84a03bc 100644 --- a/server/etcdserver/api/etcdhttp/metrics.go +++ b/server/etcdserver/api/etcdhttp/metrics.go @@ -19,14 +19,15 @@ import ( "encoding/json" "fmt" "net/http" - "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "go.etcd.io/etcd/api/v3/etcdserverpb" + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/auth" - "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/etcd/server/v3/config" "go.uber.org/zap" ) @@ -37,23 +38,16 @@ const ( PathProxyHealth = "/proxy/health" ) -// HandleMetricsHealth registers metrics and health handlers. -func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) { - mux.Handle(PathMetrics, promhttp.Handler()) - mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { - if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { - return h - } - if h := checkLeader(lg, srv, serializable); h.Health != "true" { - return h - } - return checkV2API(lg, srv) - })) +type ServerHealth interface { + Alarms() []*pb.AlarmMember + Leader() types.ID + Range(context.Context, *pb.RangeRequest) (*pb.RangeResponse, error) + Config() config.ServerConfig } -// HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request +// HandleMetricsHealth registers metrics and health handlers. it checks health by using v3 range request // and its corresponding timeout. -func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) { +func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv ServerHealth) { mux.Handle(PathMetrics, promhttp.Handler()) mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { @@ -62,7 +56,7 @@ func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserve if h := checkLeader(lg, srv, serializable); h.Health != "true" { return h } - return checkV3API(lg, srv, serializable) + return checkAPI(lg, srv, serializable) })) } @@ -155,7 +149,7 @@ func getSerializableFlag(r *http.Request) bool { // TODO: etcdserver.ErrNoLeader in health API -func checkAlarms(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { +func checkAlarms(lg *zap.Logger, srv ServerHealth, excludedAlarms AlarmSet) Health { h := Health{Health: "true"} as := srv.Alarms() if len(as) > 0 { @@ -183,7 +177,7 @@ func checkAlarms(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSe return h } -func checkLeader(lg *zap.Logger, srv etcdserver.ServerV2, serializable bool) Health { +func checkLeader(lg *zap.Logger, srv ServerHealth, serializable bool) Health { h := Health{Health: "true"} if !serializable && (uint64(srv.Leader()) == raft.None) { h.Health = "false" @@ -193,24 +187,10 @@ func checkLeader(lg *zap.Logger, srv etcdserver.ServerV2, serializable bool) Hea return h } -func checkV2API(lg *zap.Logger, srv etcdserver.ServerV2) Health { +func checkAPI(lg *zap.Logger, srv ServerHealth, serializable bool) Health { h := Health{Health: "true"} - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - _, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"}) - cancel() - if err != nil { - h.Health = "false" - h.Reason = fmt.Sprintf("QGET ERROR:%s", err) - lg.Warn("serving /health false; QGET fails", zap.Error(err)) - return h - } - lg.Debug("serving /health true") - return h -} - -func checkV3API(lg *zap.Logger, srv *etcdserver.EtcdServer, serializable bool) Health { - h := Health{Health: "true"} - ctx, cancel := context.WithTimeout(context.Background(), srv.Cfg.ReqTimeout()) + cfg := srv.Config() + ctx, cancel := context.WithTimeout(context.Background(), cfg.ReqTimeout()) _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) cancel() if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied { diff --git a/server/etcdserver/api/etcdhttp/metrics_test.go b/server/etcdserver/api/etcdhttp/metrics_test.go index bd4f156df..788a038aa 100644 --- a/server/etcdserver/api/etcdhttp/metrics_test.go +++ b/server/etcdserver/api/etcdhttp/metrics_test.go @@ -13,6 +13,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/raft/v3" + "go.etcd.io/etcd/server/v3/auth" + "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver" "go.uber.org/zap/zaptest" ) @@ -23,24 +25,33 @@ func (s *fakeStats) SelfStats() []byte { return nil } func (s *fakeStats) LeaderStats() []byte { return nil } func (s *fakeStats) StoreStats() []byte { return nil } -type fakeServerV2 struct { +type fakeHealthServer struct { fakeServer - health string + health string + apiError error } -func (s *fakeServerV2) Leader() types.ID { +func (s *fakeHealthServer) Range(ctx context.Context, request *pb.RangeRequest) (*pb.RangeResponse, error) { + return nil, s.apiError +} + +func (s *fakeHealthServer) Config() config.ServerConfig { + return config.ServerConfig{} +} + +func (s *fakeHealthServer) Leader() types.ID { if s.health == "true" { return 1 } return types.ID(raft.None) } -func (s *fakeServerV2) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) { +func (s *fakeHealthServer) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) { if s.health == "true" { return etcdserver.Response{}, nil } return etcdserver.Response{}, fmt.Errorf("fail health check") } -func (s *fakeServerV2) ClientCertAuthEnabled() bool { return false } +func (s *fakeHealthServer) ClientCertAuthEnabled() bool { return false } func TestHealthHandler(t *testing.T) { // define the input and expected output @@ -49,6 +60,7 @@ func TestHealthHandler(t *testing.T) { name string alarms []*pb.AlarmMember healthCheckURL string + apiError error expectStatusCode int expectHealth string @@ -102,14 +114,33 @@ func TestHealthHandler(t *testing.T) { expectStatusCode: http.StatusOK, expectHealth: "true", }, + { + healthCheckURL: "/health", + apiError: auth.ErrUserEmpty, + expectStatusCode: http.StatusOK, + expectHealth: "true", + }, + { + healthCheckURL: "/health", + apiError: auth.ErrPermissionDenied, + expectStatusCode: http.StatusOK, + expectHealth: "true", + }, + { + healthCheckURL: "/health", + apiError: fmt.Errorf("Unexpected error"), + expectStatusCode: http.StatusServiceUnavailable, + expectHealth: "false", + }, } for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { mux := http.NewServeMux() - HandleMetricsHealth(zaptest.NewLogger(t), mux, &fakeServerV2{ + HandleMetricsHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{ fakeServer: fakeServer{alarms: tt.alarms}, health: tt.expectHealth, + apiError: tt.apiError, }) ts := httptest.NewServer(mux) defer ts.Close() diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index c79055feb..3c50719bc 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -445,6 +445,10 @@ func (s *EtcdServer) Logger() *zap.Logger { return l } +func (s *EtcdServer) Config() config.ServerConfig { + return s.Cfg +} + func tickToDur(ticks int, tickMs uint) string { return fmt.Sprintf("%v", time.Duration(ticks)*time.Duration(tickMs)*time.Millisecond) } diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index 26fe39daf..51c5525da 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -993,7 +993,7 @@ func (m *Member) Launch() error { for _, ln := range m.ClientListeners { handler := http.NewServeMux() etcdhttp.HandleBasic(m.Logger, handler, m.Server) - etcdhttp.HandleMetricsHealthForV3(m.Logger, handler, m.Server) + etcdhttp.HandleMetricsHealth(m.Logger, handler, m.Server) hs := &httptest.Server{ Listener: ln, Config: &http.Server{ From 722ec487df61f436f1565e2e8334ff726017756d Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Thu, 27 Jan 2022 19:04:41 +0100 Subject: [PATCH 7/9] server: Split metrics and health code --- server/embed/etcd.go | 6 +- server/etcdmain/etcd.go | 2 +- server/etcdserver/api/etcdhttp/health.go | 195 ++++++++++++++++++ .../{metrics_test.go => health_test.go} | 2 +- server/etcdserver/api/etcdhttp/metrics.go | 177 +--------------- tests/framework/integration/cluster.go | 3 +- 6 files changed, 205 insertions(+), 180 deletions(-) create mode 100644 server/etcdserver/api/etcdhttp/health.go rename server/etcdserver/api/etcdhttp/{metrics_test.go => health_test.go} (98%) diff --git a/server/embed/etcd.go b/server/embed/etcd.go index becdc666f..cc8fef220 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -702,7 +702,8 @@ func (e *Etcd) serveClients() (err error) { // Start a client server goroutine for each listen address mux := http.NewServeMux() etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server) - etcdhttp.HandleMetricsHealth(e.cfg.logger, mux, e.Server) + etcdhttp.HandleMetrics(mux) + etcdhttp.HandleHealth(e.cfg.logger, mux, e.Server) gopts := []grpc.ServerOption{} if e.cfg.GRPCKeepAliveMinTime > time.Duration(0) { @@ -735,7 +736,8 @@ func (e *Etcd) serveMetrics() (err error) { if len(e.cfg.ListenMetricsUrls) > 0 { metricsMux := http.NewServeMux() - etcdhttp.HandleMetricsHealth(e.cfg.logger, metricsMux, e.Server) + etcdhttp.HandleMetrics(metricsMux) + etcdhttp.HandleHealth(e.cfg.logger, metricsMux, e.Server) for _, murl := range e.cfg.ListenMetricsUrls { tlsInfo := &e.cfg.ClientTLSInfo diff --git a/server/etcdmain/etcd.go b/server/etcdmain/etcd.go index 69828c5fd..eb50558ff 100644 --- a/server/etcdmain/etcd.go +++ b/server/etcdmain/etcd.go @@ -422,7 +422,7 @@ func startProxy(cfg *config) error { go func() { lg.Info("v2 proxy started listening on client requests", zap.String("host", host)) mux := http.NewServeMux() - etcdhttp.HandlePrometheus(mux) // v2 proxy just uses the same port + etcdhttp.HandleMetrics(mux) // v2 proxy just uses the same port mux.Handle("/", ph) lg.Fatal("done serving", zap.Error(http.Serve(l, mux))) }() diff --git a/server/etcdserver/api/etcdhttp/health.go b/server/etcdserver/api/etcdhttp/health.go new file mode 100644 index 000000000..1d29d97be --- /dev/null +++ b/server/etcdserver/api/etcdhttp/health.go @@ -0,0 +1,195 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdhttp + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "go.etcd.io/etcd/api/v3/etcdserverpb" + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/pkg/v3/types" + "go.etcd.io/etcd/raft/v3" + "go.etcd.io/etcd/server/v3/auth" + "go.etcd.io/etcd/server/v3/config" + "go.uber.org/zap" +) + +const ( + PathHealth = "/health" + PathProxyHealth = "/proxy/health" +) + +type ServerHealth interface { + Alarms() []*pb.AlarmMember + Leader() types.ID + Range(context.Context, *pb.RangeRequest) (*pb.RangeResponse, error) + Config() config.ServerConfig +} + +// HandleHealth registers metrics and health handlers. it checks health by using v3 range request +// and its corresponding timeout. +func HandleHealth(lg *zap.Logger, mux *http.ServeMux, srv ServerHealth) { + mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { + if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { + return h + } + if h := checkLeader(lg, srv, serializable); h.Health != "true" { + return h + } + return checkAPI(lg, srv, serializable) + })) +} + +// NewHealthHandler handles '/health' requests. +func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet, Serializable bool) Health) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.Header().Set("Allow", http.MethodGet) + http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) + lg.Warn("/health error", zap.Int("status-code", http.StatusMethodNotAllowed)) + return + } + excludedAlarms := getExcludedAlarms(r) + // Passing the query parameter "serializable=true" ensures that the + // health of the local etcd is checked vs the health of the cluster. + // This is useful for probes attempting to validate the liveness of + // the etcd process vs readiness of the cluster to serve requests. + serializableFlag := getSerializableFlag(r) + h := hfunc(excludedAlarms, serializableFlag) + defer func() { + if h.Health == "true" { + healthSuccess.Inc() + } else { + healthFailed.Inc() + } + }() + d, _ := json.Marshal(h) + if h.Health != "true" { + http.Error(w, string(d), http.StatusServiceUnavailable) + lg.Warn("/health error", zap.String("output", string(d)), zap.Int("status-code", http.StatusServiceUnavailable)) + return + } + w.WriteHeader(http.StatusOK) + w.Write(d) + lg.Debug("/health OK", zap.Int("status-code", http.StatusOK)) + } +} + +var ( + healthSuccess = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "health_success", + Help: "The total number of successful health checks", + }) + healthFailed = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "health_failures", + Help: "The total number of failed health checks", + }) +) + +func init() { + prometheus.MustRegister(healthSuccess) + prometheus.MustRegister(healthFailed) +} + +// Health defines etcd server health status. +// TODO: remove manual parsing in etcdctl cluster-health +type Health struct { + Health string `json:"health"` + Reason string `json:"reason"` +} + +type AlarmSet map[string]struct{} + +func getExcludedAlarms(r *http.Request) (alarms AlarmSet) { + alarms = make(map[string]struct{}, 2) + alms, found := r.URL.Query()["exclude"] + if found { + for _, alm := range alms { + if len(alm) == 0 { + continue + } + alarms[alm] = struct{}{} + } + } + return alarms +} + +func getSerializableFlag(r *http.Request) bool { + return r.URL.Query().Get("serializable") == "true" +} + +// TODO: etcdserver.ErrNoLeader in health API + +func checkAlarms(lg *zap.Logger, srv ServerHealth, excludedAlarms AlarmSet) Health { + h := Health{Health: "true"} + as := srv.Alarms() + if len(as) > 0 { + for _, v := range as { + alarmName := v.Alarm.String() + if _, found := excludedAlarms[alarmName]; found { + lg.Debug("/health excluded alarm", zap.String("alarm", v.String())) + continue + } + + h.Health = "false" + switch v.Alarm { + case etcdserverpb.AlarmType_NOSPACE: + h.Reason = "ALARM NOSPACE" + case etcdserverpb.AlarmType_CORRUPT: + h.Reason = "ALARM CORRUPT" + default: + h.Reason = "ALARM UNKNOWN" + } + lg.Warn("serving /health false due to an alarm", zap.String("alarm", v.String())) + return h + } + } + + return h +} + +func checkLeader(lg *zap.Logger, srv ServerHealth, serializable bool) Health { + h := Health{Health: "true"} + if !serializable && (uint64(srv.Leader()) == raft.None) { + h.Health = "false" + h.Reason = "RAFT NO LEADER" + lg.Warn("serving /health false; no leader") + } + return h +} + +func checkAPI(lg *zap.Logger, srv ServerHealth, serializable bool) Health { + h := Health{Health: "true"} + cfg := srv.Config() + ctx, cancel := context.WithTimeout(context.Background(), cfg.ReqTimeout()) + _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) + cancel() + if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied { + h.Health = "false" + h.Reason = fmt.Sprintf("RANGE ERROR:%s", err) + lg.Warn("serving /health false; Range fails", zap.Error(err)) + return h + } + lg.Debug("serving /health true") + return h +} diff --git a/server/etcdserver/api/etcdhttp/metrics_test.go b/server/etcdserver/api/etcdhttp/health_test.go similarity index 98% rename from server/etcdserver/api/etcdhttp/metrics_test.go rename to server/etcdserver/api/etcdhttp/health_test.go index 788a038aa..7cb30148b 100644 --- a/server/etcdserver/api/etcdhttp/metrics_test.go +++ b/server/etcdserver/api/etcdhttp/health_test.go @@ -137,7 +137,7 @@ func TestHealthHandler(t *testing.T) { for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { mux := http.NewServeMux() - HandleMetricsHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{ + HandleHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{ fakeServer: fakeServer{alarms: tt.alarms}, health: tt.expectHealth, apiError: tt.apiError, diff --git a/server/etcdserver/api/etcdhttp/metrics.go b/server/etcdserver/api/etcdhttp/metrics.go index 5b84a03bc..bf7d4a4a4 100644 --- a/server/etcdserver/api/etcdhttp/metrics.go +++ b/server/etcdserver/api/etcdhttp/metrics.go @@ -15,190 +15,17 @@ package etcdhttp import ( - "context" - "encoding/json" - "fmt" "net/http" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" - "go.etcd.io/etcd/api/v3/etcdserverpb" - pb "go.etcd.io/etcd/api/v3/etcdserverpb" - "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/server/v3/auth" - "go.etcd.io/etcd/server/v3/config" - "go.uber.org/zap" ) const ( PathMetrics = "/metrics" - PathHealth = "/health" PathProxyMetrics = "/proxy/metrics" - PathProxyHealth = "/proxy/health" ) -type ServerHealth interface { - Alarms() []*pb.AlarmMember - Leader() types.ID - Range(context.Context, *pb.RangeRequest) (*pb.RangeResponse, error) - Config() config.ServerConfig -} - -// HandleMetricsHealth registers metrics and health handlers. it checks health by using v3 range request -// and its corresponding timeout. -func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv ServerHealth) { - mux.Handle(PathMetrics, promhttp.Handler()) - mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { - if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" { - return h - } - if h := checkLeader(lg, srv, serializable); h.Health != "true" { - return h - } - return checkAPI(lg, srv, serializable) - })) -} - -// HandlePrometheus registers prometheus handler on '/metrics'. -func HandlePrometheus(mux *http.ServeMux) { +// HandleMetrics registers prometheus handler on '/metrics'. +func HandleMetrics(mux *http.ServeMux) { mux.Handle(PathMetrics, promhttp.Handler()) } - -// NewHealthHandler handles '/health' requests. -func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet, Serializable bool) Health) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodGet { - w.Header().Set("Allow", http.MethodGet) - http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) - lg.Warn("/health error", zap.Int("status-code", http.StatusMethodNotAllowed)) - return - } - excludedAlarms := getExcludedAlarms(r) - // Passing the query parameter "serializable=true" ensures that the - // health of the local etcd is checked vs the health of the cluster. - // This is useful for probes attempting to validate the liveness of - // the etcd process vs readiness of the cluster to serve requests. - serializableFlag := getSerializableFlag(r) - h := hfunc(excludedAlarms, serializableFlag) - defer func() { - if h.Health == "true" { - healthSuccess.Inc() - } else { - healthFailed.Inc() - } - }() - d, _ := json.Marshal(h) - if h.Health != "true" { - http.Error(w, string(d), http.StatusServiceUnavailable) - lg.Warn("/health error", zap.String("output", string(d)), zap.Int("status-code", http.StatusServiceUnavailable)) - return - } - w.WriteHeader(http.StatusOK) - w.Write(d) - lg.Debug("/health OK", zap.Int("status-code", http.StatusOK)) - } -} - -var ( - healthSuccess = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "etcd", - Subsystem: "server", - Name: "health_success", - Help: "The total number of successful health checks", - }) - healthFailed = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "etcd", - Subsystem: "server", - Name: "health_failures", - Help: "The total number of failed health checks", - }) -) - -func init() { - prometheus.MustRegister(healthSuccess) - prometheus.MustRegister(healthFailed) -} - -// Health defines etcd server health status. -// TODO: remove manual parsing in etcdctl cluster-health -type Health struct { - Health string `json:"health"` - Reason string `json:"reason"` -} - -type AlarmSet map[string]struct{} - -func getExcludedAlarms(r *http.Request) (alarms AlarmSet) { - alarms = make(map[string]struct{}, 2) - alms, found := r.URL.Query()["exclude"] - if found { - for _, alm := range alms { - if len(alm) == 0 { - continue - } - alarms[alm] = struct{}{} - } - } - return alarms -} - -func getSerializableFlag(r *http.Request) bool { - return r.URL.Query().Get("serializable") == "true" -} - -// TODO: etcdserver.ErrNoLeader in health API - -func checkAlarms(lg *zap.Logger, srv ServerHealth, excludedAlarms AlarmSet) Health { - h := Health{Health: "true"} - as := srv.Alarms() - if len(as) > 0 { - for _, v := range as { - alarmName := v.Alarm.String() - if _, found := excludedAlarms[alarmName]; found { - lg.Debug("/health excluded alarm", zap.String("alarm", v.String())) - continue - } - - h.Health = "false" - switch v.Alarm { - case etcdserverpb.AlarmType_NOSPACE: - h.Reason = "ALARM NOSPACE" - case etcdserverpb.AlarmType_CORRUPT: - h.Reason = "ALARM CORRUPT" - default: - h.Reason = "ALARM UNKNOWN" - } - lg.Warn("serving /health false due to an alarm", zap.String("alarm", v.String())) - return h - } - } - - return h -} - -func checkLeader(lg *zap.Logger, srv ServerHealth, serializable bool) Health { - h := Health{Health: "true"} - if !serializable && (uint64(srv.Leader()) == raft.None) { - h.Health = "false" - h.Reason = "RAFT NO LEADER" - lg.Warn("serving /health false; no leader") - } - return h -} - -func checkAPI(lg *zap.Logger, srv ServerHealth, serializable bool) Health { - h := Health{Health: "true"} - cfg := srv.Config() - ctx, cancel := context.WithTimeout(context.Background(), cfg.ReqTimeout()) - _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) - cancel() - if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied { - h.Health = "false" - h.Reason = fmt.Sprintf("RANGE ERROR:%s", err) - lg.Warn("serving /health false; Range fails", zap.Error(err)) - return h - } - lg.Debug("serving /health true") - return h -} diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index 51c5525da..3dcb4c914 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -993,7 +993,8 @@ func (m *Member) Launch() error { for _, ln := range m.ClientListeners { handler := http.NewServeMux() etcdhttp.HandleBasic(m.Logger, handler, m.Server) - etcdhttp.HandleMetricsHealth(m.Logger, handler, m.Server) + etcdhttp.HandleMetrics(handler) + etcdhttp.HandleHealth(m.Logger, handler, m.Server) hs := &httptest.Server{ Listener: ln, Config: &http.Server{ From fb361e43f069dc3fc33cc348414f059c8b0b2989 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Wed, 2 Feb 2022 13:31:51 +0100 Subject: [PATCH 8/9] server: Split code for debug and version endpoints --- .../api/etcdhttp/{base.go => basic.go} | 72 ++----------------- server/etcdserver/api/etcdhttp/debug.go | 47 ++++++++++++ server/etcdserver/api/etcdhttp/peer.go | 2 +- server/etcdserver/api/etcdhttp/version.go | 61 ++++++++++++++++ 4 files changed, 116 insertions(+), 66 deletions(-) rename server/etcdserver/api/etcdhttp/{base.go => basic.go} (57%) create mode 100644 server/etcdserver/api/etcdhttp/debug.go create mode 100644 server/etcdserver/api/etcdhttp/version.go diff --git a/server/etcdserver/api/etcdhttp/base.go b/server/etcdserver/api/etcdhttp/basic.go similarity index 57% rename from server/etcdserver/api/etcdhttp/base.go rename to server/etcdserver/api/etcdhttp/basic.go index 9dc313fc1..3c2e38a3b 100644 --- a/server/etcdserver/api/etcdhttp/base.go +++ b/server/etcdserver/api/etcdhttp/basic.go @@ -1,4 +1,4 @@ -// Copyright 2015 The etcd Authors +// Copyright 2022 The etcd Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,75 +15,17 @@ package etcdhttp import ( - "encoding/json" - "expvar" - "fmt" "net/http" - "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/server/v3/etcdserver" - "go.etcd.io/etcd/server/v3/etcdserver/api" - "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types" + httptypes "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types" "go.etcd.io/etcd/server/v3/etcdserver/api/v2error" "go.uber.org/zap" ) -const ( - varsPath = "/debug/vars" - versionPath = "/version" -) - -// HandleBasic adds handlers to a mux for serving JSON etcd client requests -// that do not access the v2 store. -func HandleBasic(lg *zap.Logger, mux *http.ServeMux, server etcdserver.ServerPeer) { - mux.HandleFunc(varsPath, serveVars) - mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion)) -} - -func versionHandler(c api.Cluster, fn func(http.ResponseWriter, *http.Request, string)) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - v := c.Version() - if v != nil { - fn(w, r, v.String()) - } else { - fn(w, r, "not_decided") - } - } -} - -func serveVersion(w http.ResponseWriter, r *http.Request, clusterV string) { - if !allowMethod(w, r, "GET") { - return - } - vs := version.Versions{ - Server: version.Version, - Cluster: clusterV, - } - - w.Header().Set("Content-Type", "application/json") - b, err := json.Marshal(&vs) - if err != nil { - panic(fmt.Sprintf("cannot marshal versions to json (%v)", err)) - } - w.Write(b) -} - -func serveVars(w http.ResponseWriter, r *http.Request) { - if !allowMethod(w, r, "GET") { - return - } - - w.Header().Set("Content-Type", "application/json; charset=utf-8") - fmt.Fprintf(w, "{\n") - first := true - expvar.Do(func(kv expvar.KeyValue) { - if !first { - fmt.Fprintf(w, ",\n") - } - first = false - fmt.Fprintf(w, "%q: %s", kv.Key, kv.Value) - }) - fmt.Fprintf(w, "\n}\n") +func HandleBasic(lg *zap.Logger, mux *http.ServeMux, peer etcdserver.ServerPeer) { + HandleDebug(mux) + HandleVersion(mux, peer) } func allowMethod(w http.ResponseWriter, r *http.Request, m string) bool { @@ -95,10 +37,10 @@ func allowMethod(w http.ResponseWriter, r *http.Request, m string) bool { return false } -// WriteError logs and writes the given Error to the ResponseWriter +// writeError logs and writes the given Error to the ResponseWriter // If Error is an etcdErr, it is rendered to the ResponseWriter // Otherwise, it is assumed to be a StatusInternalServerError -func WriteError(lg *zap.Logger, w http.ResponseWriter, r *http.Request, err error) { +func writeError(lg *zap.Logger, w http.ResponseWriter, r *http.Request, err error) { if err == nil { return } diff --git a/server/etcdserver/api/etcdhttp/debug.go b/server/etcdserver/api/etcdhttp/debug.go new file mode 100644 index 000000000..502079e2a --- /dev/null +++ b/server/etcdserver/api/etcdhttp/debug.go @@ -0,0 +1,47 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdhttp + +import ( + "expvar" + "fmt" + "net/http" +) + +const ( + varsPath = "/debug/vars" +) + +func HandleDebug(mux *http.ServeMux) { + mux.HandleFunc(varsPath, serveVars) +} + +func serveVars(w http.ResponseWriter, r *http.Request) { + if !allowMethod(w, r, "GET") { + return + } + + w.Header().Set("Content-Type", "application/json; charset=utf-8") + fmt.Fprintf(w, "{\n") + first := true + expvar.Do(func(kv expvar.KeyValue) { + if !first { + fmt.Fprintf(w, ",\n") + } + first = false + fmt.Fprintf(w, "%q: %s", kv.Key, kv.Value) + }) + fmt.Fprintf(w, "\n}\n") +} diff --git a/server/etcdserver/api/etcdhttp/peer.go b/server/etcdserver/api/etcdhttp/peer.go index badc98634..4470bf9e6 100644 --- a/server/etcdserver/api/etcdhttp/peer.go +++ b/server/etcdserver/api/etcdhttp/peer.go @@ -145,7 +145,7 @@ func (h *peerMemberPromoteHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ case etcdserver.ErrLearnerNotReady: http.Error(w, err.Error(), http.StatusPreconditionFailed) default: - WriteError(h.lg, w, r, err) + writeError(h.lg, w, r, err) } h.lg.Warn( "failed to promote a member", diff --git a/server/etcdserver/api/etcdhttp/version.go b/server/etcdserver/api/etcdhttp/version.go new file mode 100644 index 000000000..296861175 --- /dev/null +++ b/server/etcdserver/api/etcdhttp/version.go @@ -0,0 +1,61 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdhttp + +import ( + "encoding/json" + "fmt" + "net/http" + + "go.etcd.io/etcd/api/v3/version" + "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/etcd/server/v3/etcdserver/api" +) + +const ( + versionPath = "/version" +) + +func HandleVersion(mux *http.ServeMux, server etcdserver.ServerPeer) { + mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion)) +} + +func versionHandler(c api.Cluster, fn func(http.ResponseWriter, *http.Request, string)) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + v := c.Version() + if v != nil { + fn(w, r, v.String()) + } else { + fn(w, r, "not_decided") + } + } +} + +func serveVersion(w http.ResponseWriter, r *http.Request, clusterV string) { + if !allowMethod(w, r, "GET") { + return + } + vs := version.Versions{ + Server: version.Version, + Cluster: clusterV, + } + + w.Header().Set("Content-Type", "application/json") + b, err := json.Marshal(&vs) + if err != nil { + panic(fmt.Sprintf("cannot marshal versions to json (%v)", err)) + } + w.Write(b) +} From 26f42e7a9e16c64bd86412f71572c11d56b9d45e Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Fri, 25 Feb 2022 14:54:57 +0100 Subject: [PATCH 9/9] server: Apply review comments and split basic handler --- server/embed/etcd.go | 3 ++- server/etcdserver/api/etcdhttp/health_test.go | 11 +++++++---- server/etcdserver/api/etcdhttp/{basic.go => utils.go} | 5 ----- tests/framework/integration/cluster.go | 3 ++- 4 files changed, 11 insertions(+), 11 deletions(-) rename server/etcdserver/api/etcdhttp/{basic.go => utils.go} (95%) diff --git a/server/embed/etcd.go b/server/embed/etcd.go index cc8fef220..663e082d3 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -701,7 +701,8 @@ func (e *Etcd) serveClients() (err error) { // Start a client server goroutine for each listen address mux := http.NewServeMux() - etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server) + etcdhttp.HandleDebug(mux) + etcdhttp.HandleVersion(mux, e.Server) etcdhttp.HandleMetrics(mux) etcdhttp.HandleHealth(e.cfg.logger, mux, e.Server) diff --git a/server/etcdserver/api/etcdhttp/health_test.go b/server/etcdserver/api/etcdhttp/health_test.go index 7cb30148b..f5d3e048a 100644 --- a/server/etcdserver/api/etcdhttp/health_test.go +++ b/server/etcdserver/api/etcdhttp/health_test.go @@ -115,18 +115,21 @@ func TestHealthHandler(t *testing.T) { expectHealth: "true", }, { + name: "Healthy even if authentication failed", healthCheckURL: "/health", apiError: auth.ErrUserEmpty, expectStatusCode: http.StatusOK, expectHealth: "true", }, { + name: "Healthy even if authorization failed", healthCheckURL: "/health", apiError: auth.ErrPermissionDenied, expectStatusCode: http.StatusOK, expectHealth: "true", }, { + name: "Unhealthy if api is not available", healthCheckURL: "/health", apiError: fmt.Errorf("Unexpected error"), expectStatusCode: http.StatusServiceUnavailable, @@ -134,7 +137,7 @@ func TestHealthHandler(t *testing.T) { }, } - for i, tt := range tests { + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { mux := http.NewServeMux() HandleHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{ @@ -147,14 +150,14 @@ func TestHealthHandler(t *testing.T) { res, err := ts.Client().Do(&http.Request{Method: http.MethodGet, URL: testutil.MustNewURL(t, ts.URL+tt.healthCheckURL)}) if err != nil { - t.Errorf("fail serve http request %s %v in test case #%d", tt.healthCheckURL, err, i+1) + t.Errorf("fail serve http request %s %v", tt.healthCheckURL, err) } if res == nil { - t.Errorf("got nil http response with http request %s in test case #%d", tt.healthCheckURL, i+1) + t.Errorf("got nil http response with http request %s", tt.healthCheckURL) return } if res.StatusCode != tt.expectStatusCode { - t.Errorf("want statusCode %d but got %d in test case #%d", tt.expectStatusCode, res.StatusCode, i+1) + t.Errorf("want statusCode %d but got %d", tt.expectStatusCode, res.StatusCode) } health, err := parseHealthOutput(res.Body) if err != nil { diff --git a/server/etcdserver/api/etcdhttp/basic.go b/server/etcdserver/api/etcdhttp/utils.go similarity index 95% rename from server/etcdserver/api/etcdhttp/basic.go rename to server/etcdserver/api/etcdhttp/utils.go index 3c2e38a3b..09957bfc1 100644 --- a/server/etcdserver/api/etcdhttp/basic.go +++ b/server/etcdserver/api/etcdhttp/utils.go @@ -23,11 +23,6 @@ import ( "go.uber.org/zap" ) -func HandleBasic(lg *zap.Logger, mux *http.ServeMux, peer etcdserver.ServerPeer) { - HandleDebug(mux) - HandleVersion(mux, peer) -} - func allowMethod(w http.ResponseWriter, r *http.Request, m string) bool { if m == r.Method { return true diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index 3dcb4c914..125e228bc 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -992,7 +992,8 @@ func (m *Member) Launch() error { } for _, ln := range m.ClientListeners { handler := http.NewServeMux() - etcdhttp.HandleBasic(m.Logger, handler, m.Server) + etcdhttp.HandleDebug(handler) + etcdhttp.HandleVersion(handler, m.Server) etcdhttp.HandleMetrics(handler) etcdhttp.HandleHealth(m.Logger, handler, m.Server) hs := &httptest.Server{