mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #12917 from chaochn47/2021-05-03-backport-#12880
Backport-3.4 exclude alarms from health check conditionally
This commit is contained in:
commit
6bbc85827b
@ -36,7 +36,7 @@ const (
|
||||
// HandleMetricsHealth registers metrics and health handlers.
|
||||
func HandleMetricsHealth(mux *http.ServeMux, srv etcdserver.ServerV2) {
|
||||
mux.Handle(PathMetrics, promhttp.Handler())
|
||||
mux.Handle(PathHealth, NewHealthHandler(func() Health { return checkHealth(srv) }))
|
||||
mux.Handle(PathHealth, NewHealthHandler(func(excludedAlarms AlarmSet) Health { return checkHealth(srv, excludedAlarms) }))
|
||||
}
|
||||
|
||||
// HandlePrometheus registers prometheus handler on '/metrics'.
|
||||
@ -45,7 +45,7 @@ func HandlePrometheus(mux *http.ServeMux) {
|
||||
}
|
||||
|
||||
// NewHealthHandler handles '/health' requests.
|
||||
func NewHealthHandler(hfunc func() Health) http.HandlerFunc {
|
||||
func NewHealthHandler(hfunc func(excludedAlarms AlarmSet) Health) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
w.Header().Set("Allow", http.MethodGet)
|
||||
@ -53,7 +53,8 @@ func NewHealthHandler(hfunc func() Health) http.HandlerFunc {
|
||||
plog.Warningf("/health error (status code %d)", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
h := hfunc()
|
||||
excludedAlarms := getExcludedAlarms(r)
|
||||
h := hfunc(excludedAlarms)
|
||||
d, _ := json.Marshal(h)
|
||||
if h.Health != "true" {
|
||||
http.Error(w, string(d), http.StatusServiceUnavailable)
|
||||
@ -90,19 +91,46 @@ type Health struct {
|
||||
Health string `json:"health"`
|
||||
}
|
||||
|
||||
type AlarmSet map[string]struct{}
|
||||
|
||||
func getExcludedAlarms(r *http.Request) (alarms AlarmSet) {
|
||||
alarms = make(map[string]struct{}, 2)
|
||||
alms, found := r.URL.Query()["exclude"]
|
||||
if found {
|
||||
for _, alm := range alms {
|
||||
if len(alms) == 0 {
|
||||
continue
|
||||
}
|
||||
alarms[alm] = struct{}{}
|
||||
}
|
||||
}
|
||||
return alarms
|
||||
}
|
||||
|
||||
// TODO: server NOSPACE, etcdserver.ErrNoLeader in health API
|
||||
|
||||
func checkHealth(srv etcdserver.ServerV2) Health {
|
||||
func checkHealth(srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health {
|
||||
h := Health{Health: "true"}
|
||||
|
||||
as := srv.Alarms()
|
||||
if len(as) > 0 {
|
||||
h.Health = "false"
|
||||
for _, v := range as {
|
||||
plog.Warningf("/health error due to an alarm %s", v.String())
|
||||
alarmName := v.Alarm.String()
|
||||
if _, found := excludedAlarms[alarmName]; found {
|
||||
plog.Debugf("/health excluded alarm %s", alarmName)
|
||||
delete(excludedAlarms, alarmName)
|
||||
continue
|
||||
}
|
||||
h.Health = "false"
|
||||
plog.Warningf("/health error due to %s", v.String())
|
||||
return h
|
||||
}
|
||||
}
|
||||
|
||||
if len(excludedAlarms) > 0 {
|
||||
plog.Warningf("fail exclude alarms from health check, exclude alarms %+v", excludedAlarms)
|
||||
}
|
||||
|
||||
if h.Health == "true" {
|
||||
if uint64(srv.Leader()) == raft.None {
|
||||
h.Health = "false"
|
||||
|
151
etcdserver/api/etcdhttp/metrics_test.go
Normal file
151
etcdserver/api/etcdhttp/metrics_test.go
Normal file
@ -0,0 +1,151 @@
|
||||
// Copyright 2021 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package etcdhttp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"go.etcd.io/etcd/etcdserver"
|
||||
stats "go.etcd.io/etcd/etcdserver/api/v2stats"
|
||||
pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
|
||||
"go.etcd.io/etcd/pkg/testutil"
|
||||
"go.etcd.io/etcd/pkg/types"
|
||||
"go.etcd.io/etcd/raft"
|
||||
)
|
||||
|
||||
type fakeStats struct{}
|
||||
|
||||
func (s *fakeStats) SelfStats() []byte { return nil }
|
||||
func (s *fakeStats) LeaderStats() []byte { return nil }
|
||||
func (s *fakeStats) StoreStats() []byte { return nil }
|
||||
|
||||
type fakeServerV2 struct {
|
||||
fakeServer
|
||||
stats.Stats
|
||||
health string
|
||||
}
|
||||
|
||||
func (s *fakeServerV2) Leader() types.ID {
|
||||
if s.health == "true" {
|
||||
return 1
|
||||
}
|
||||
return types.ID(raft.None)
|
||||
}
|
||||
func (s *fakeServerV2) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) {
|
||||
if s.health == "true" {
|
||||
return etcdserver.Response{}, nil
|
||||
}
|
||||
return etcdserver.Response{}, fmt.Errorf("fail health check")
|
||||
}
|
||||
func (s *fakeServerV2) ClientCertAuthEnabled() bool { return false }
|
||||
|
||||
func TestHealthHandler(t *testing.T) {
|
||||
// define the input and expected output
|
||||
// input: alarms, and healthCheckURL
|
||||
tests := []struct {
|
||||
alarms []*pb.AlarmMember
|
||||
healthCheckURL string
|
||||
statusCode int
|
||||
health string
|
||||
}{
|
||||
{
|
||||
[]*pb.AlarmMember{},
|
||||
"/health",
|
||||
http.StatusOK,
|
||||
"true",
|
||||
},
|
||||
{
|
||||
[]*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
|
||||
"/health",
|
||||
http.StatusServiceUnavailable,
|
||||
"false",
|
||||
},
|
||||
{
|
||||
[]*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
|
||||
"/health?exclude=NOSPACE",
|
||||
http.StatusOK,
|
||||
"true",
|
||||
},
|
||||
{
|
||||
[]*pb.AlarmMember{},
|
||||
"/health?exclude=NOSPACE",
|
||||
http.StatusOK,
|
||||
"true",
|
||||
},
|
||||
{
|
||||
[]*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}},
|
||||
"/health?exclude=NOSPACE",
|
||||
http.StatusServiceUnavailable,
|
||||
"false",
|
||||
},
|
||||
{
|
||||
[]*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}},
|
||||
"/health?exclude=NOSPACE&exclude=CORRUPT",
|
||||
http.StatusOK,
|
||||
"true",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
func() {
|
||||
mux := http.NewServeMux()
|
||||
HandleMetricsHealth(mux, &fakeServerV2{
|
||||
fakeServer: fakeServer{alarms: tt.alarms},
|
||||
Stats: &fakeStats{},
|
||||
health: tt.health,
|
||||
})
|
||||
ts := httptest.NewServer(mux)
|
||||
defer ts.Close()
|
||||
|
||||
res, err := ts.Client().Do(&http.Request{Method: http.MethodGet, URL: testutil.MustNewURL(t, ts.URL+tt.healthCheckURL)})
|
||||
if err != nil {
|
||||
t.Errorf("fail serve http request %s %v in test case #%d", tt.healthCheckURL, err, i+1)
|
||||
}
|
||||
if res == nil {
|
||||
t.Errorf("got nil http response with http request %s in test case #%d", tt.healthCheckURL, i+1)
|
||||
return
|
||||
}
|
||||
if res.StatusCode != tt.statusCode {
|
||||
t.Errorf("want statusCode %d but got %d in test case #%d", tt.statusCode, res.StatusCode, i+1)
|
||||
}
|
||||
health, err := parseHealthOutput(res.Body)
|
||||
if err != nil {
|
||||
t.Errorf("fail parse health check output %v", err)
|
||||
}
|
||||
if health.Health != tt.health {
|
||||
t.Errorf("want health %s but got %s", tt.health, health.Health)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func parseHealthOutput(body io.Reader) (Health, error) {
|
||||
obj := Health{}
|
||||
d, derr := ioutil.ReadAll(body)
|
||||
if derr != nil {
|
||||
return obj, derr
|
||||
}
|
||||
if err := json.Unmarshal(d, &obj); err != nil {
|
||||
return obj, err
|
||||
}
|
||||
return obj, nil
|
||||
}
|
@ -58,6 +58,7 @@ func (c *fakeCluster) Version() *semver.Version { return nil }
|
||||
|
||||
type fakeServer struct {
|
||||
cluster api.Cluster
|
||||
alarms []*pb.AlarmMember
|
||||
}
|
||||
|
||||
func (s *fakeServer) AddMember(ctx context.Context, memb membership.Member) ([]*membership.Member, error) {
|
||||
@ -74,7 +75,7 @@ func (s *fakeServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
|
||||
}
|
||||
func (s *fakeServer) ClusterVersion() *semver.Version { return nil }
|
||||
func (s *fakeServer) Cluster() api.Cluster { return s.cluster }
|
||||
func (s *fakeServer) Alarms() []*pb.AlarmMember { return nil }
|
||||
func (s *fakeServer) Alarms() []*pb.AlarmMember { return s.alarms }
|
||||
|
||||
var fakeRaftHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte("test data"))
|
||||
|
@ -26,7 +26,7 @@ import (
|
||||
|
||||
// HandleHealth registers health handler on '/health'.
|
||||
func HandleHealth(mux *http.ServeMux, c *clientv3.Client) {
|
||||
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(func() etcdhttp.Health { return checkHealth(c) }))
|
||||
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(func(excludedAlarms etcdhttp.AlarmSet) etcdhttp.Health { return checkHealth(c) }))
|
||||
}
|
||||
|
||||
func checkHealth(c *clientv3.Client) etcdhttp.Health {
|
||||
|
Loading…
x
Reference in New Issue
Block a user