mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #12195 from tangcong/optimize-healthcheck
*: check health by using v3 range request and its corresponding timeout
This commit is contained in:
commit
fe36be2251
@ -134,6 +134,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
|
|||||||
- Improve [mvcc.watchResponse channel Memory Usage](https://github.com/etcd-io/etcd/pull/11987).
|
- Improve [mvcc.watchResponse channel Memory Usage](https://github.com/etcd-io/etcd/pull/11987).
|
||||||
- Log [expensive request info in UnaryInterceptor](https://github.com/etcd-io/etcd/pull/12086).
|
- Log [expensive request info in UnaryInterceptor](https://github.com/etcd-io/etcd/pull/12086).
|
||||||
- [Fix invalid Go type in etcdserverpb](https://github.com/etcd-io/etcd/pull/12000).
|
- [Fix invalid Go type in etcdserverpb](https://github.com/etcd-io/etcd/pull/12000).
|
||||||
|
- [Improve healthcheck by using v3 range request and its corresponding timeout](https://github.com/etcd-io/etcd/pull/12195).
|
||||||
|
|
||||||
### Package `embed`
|
### Package `embed`
|
||||||
|
|
||||||
|
@ -632,6 +632,7 @@ func (e *Etcd) serveClients() (err error) {
|
|||||||
} else {
|
} else {
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server)
|
etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server)
|
||||||
|
etcdhttp.HandleMetricsHealthForV3(e.cfg.logger, mux, e.Server)
|
||||||
h = mux
|
h = mux
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -666,7 +667,7 @@ func (e *Etcd) serveMetrics() (err error) {
|
|||||||
|
|
||||||
if len(e.cfg.ListenMetricsUrls) > 0 {
|
if len(e.cfg.ListenMetricsUrls) > 0 {
|
||||||
metricsMux := http.NewServeMux()
|
metricsMux := http.NewServeMux()
|
||||||
etcdhttp.HandleMetricsHealth(e.cfg.logger, metricsMux, e.Server)
|
etcdhttp.HandleMetricsHealthForV3(e.cfg.logger, metricsMux, e.Server)
|
||||||
|
|
||||||
for _, murl := range e.cfg.ListenMetricsUrls {
|
for _, murl := range e.cfg.ListenMetricsUrls {
|
||||||
tlsInfo := &e.cfg.ClientTLSInfo
|
tlsInfo := &e.cfg.ClientTLSInfo
|
||||||
|
@ -37,12 +37,7 @@ const (
|
|||||||
// HandleBasic adds handlers to a mux for serving JSON etcd client requests
|
// HandleBasic adds handlers to a mux for serving JSON etcd client requests
|
||||||
// that do not access the v2 store.
|
// that do not access the v2 store.
|
||||||
func HandleBasic(lg *zap.Logger, mux *http.ServeMux, server etcdserver.ServerPeer) {
|
func HandleBasic(lg *zap.Logger, mux *http.ServeMux, server etcdserver.ServerPeer) {
|
||||||
if lg == nil {
|
|
||||||
lg = zap.NewNop()
|
|
||||||
}
|
|
||||||
mux.HandleFunc(varsPath, serveVars)
|
mux.HandleFunc(varsPath, serveVars)
|
||||||
|
|
||||||
HandleMetricsHealth(lg, mux, server)
|
|
||||||
mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion))
|
mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ package etcdhttp
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -38,7 +39,14 @@ const (
|
|||||||
// HandleMetricsHealth registers metrics and health handlers.
|
// HandleMetricsHealth registers metrics and health handlers.
|
||||||
func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) {
|
func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) {
|
||||||
mux.Handle(PathMetrics, promhttp.Handler())
|
mux.Handle(PathMetrics, promhttp.Handler())
|
||||||
mux.Handle(PathHealth, NewHealthHandler(lg, func() Health { return checkHealth(lg, srv) }))
|
mux.Handle(PathHealth, NewHealthHandler(lg, func() Health { return checkV2Health(lg, srv) }))
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request
|
||||||
|
// and its corresponding timeout.
|
||||||
|
func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) {
|
||||||
|
mux.Handle(PathMetrics, promhttp.Handler())
|
||||||
|
mux.Handle(PathHealth, NewHealthHandler(lg, func() Health { return checkV3Health(lg, srv) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
// HandlePrometheus registers prometheus handler on '/metrics'.
|
// HandlePrometheus registers prometheus handler on '/metrics'.
|
||||||
@ -56,6 +64,13 @@ func NewHealthHandler(lg *zap.Logger, hfunc func() Health) http.HandlerFunc {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
h := hfunc()
|
h := hfunc()
|
||||||
|
defer func() {
|
||||||
|
if h.Health == "true" {
|
||||||
|
healthSuccess.Inc()
|
||||||
|
} else {
|
||||||
|
healthFailed.Inc()
|
||||||
|
}
|
||||||
|
}()
|
||||||
d, _ := json.Marshal(h)
|
d, _ := json.Marshal(h)
|
||||||
if h.Health != "true" {
|
if h.Health != "true" {
|
||||||
http.Error(w, string(d), http.StatusServiceUnavailable)
|
http.Error(w, string(d), http.StatusServiceUnavailable)
|
||||||
@ -97,17 +112,9 @@ type Health struct {
|
|||||||
|
|
||||||
// TODO: server NOSPACE, etcdserver.ErrNoLeader in health API
|
// TODO: server NOSPACE, etcdserver.ErrNoLeader in health API
|
||||||
|
|
||||||
func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2) (h Health) {
|
func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2) Health {
|
||||||
|
h := Health{}
|
||||||
h.Health = "true"
|
h.Health = "true"
|
||||||
|
|
||||||
defer func() {
|
|
||||||
if h.Health == "true" {
|
|
||||||
healthSuccess.Inc()
|
|
||||||
} else {
|
|
||||||
healthFailed.Inc()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
as := srv.Alarms()
|
as := srv.Alarms()
|
||||||
if len(as) > 0 {
|
if len(as) > 0 {
|
||||||
h.Health = "false"
|
h.Health = "false"
|
||||||
@ -122,25 +129,48 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2) (h Health) {
|
|||||||
}
|
}
|
||||||
lg.Warn("serving /health false due to an alarm", zap.String("alarm", v.String()))
|
lg.Warn("serving /health false due to an alarm", zap.String("alarm", v.String()))
|
||||||
}
|
}
|
||||||
return
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
if uint64(srv.Leader()) == raft.None {
|
if uint64(srv.Leader()) == raft.None {
|
||||||
h.Health = "false"
|
h.Health = "false"
|
||||||
h.Reason = "RAFT NO LEADER"
|
h.Reason = "RAFT NO LEADER"
|
||||||
lg.Warn("serving /health false; no leader")
|
lg.Warn("serving /health false; no leader")
|
||||||
return
|
return h
|
||||||
|
}
|
||||||
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkV2Health(lg *zap.Logger, srv etcdserver.ServerV2) (h Health) {
|
||||||
|
if h = checkHealth(lg, srv); h.Health != "true" {
|
||||||
|
return
|
||||||
|
}
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||||
_, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"})
|
_, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"})
|
||||||
cancel()
|
cancel()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
h.Health = "false"
|
h.Health = "false"
|
||||||
h.Reason = "QGET ERROR"
|
h.Reason = fmt.Sprintf("QGET ERROR:%s", err)
|
||||||
lg.Warn("serving /health false; QGET fails", zap.Error(err))
|
lg.Warn("serving /health false; QGET fails", zap.Error(err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
lg.Info("serving /health true")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkV3Health(lg *zap.Logger, srv *etcdserver.EtcdServer) (h Health) {
|
||||||
|
if h = checkHealth(lg, srv); h.Health != "true" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), srv.Cfg.ReqTimeout())
|
||||||
|
_, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1})
|
||||||
|
cancel()
|
||||||
|
if err != nil {
|
||||||
|
h.Health = "false"
|
||||||
|
h.Reason = fmt.Sprintf("RANGE ERROR:%s", err)
|
||||||
|
lg.Warn("serving /health false; Range fails", zap.Error(err))
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
lg.Info("serving /health true")
|
lg.Info("serving /health true")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -58,6 +58,7 @@ func NewClientHandler(lg *zap.Logger, server etcdserver.ServerPeer, timeout time
|
|||||||
}
|
}
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
etcdhttp.HandleBasic(lg, mux, server)
|
etcdhttp.HandleBasic(lg, mux, server)
|
||||||
|
etcdhttp.HandleMetricsHealth(lg, mux, server)
|
||||||
handleV2(lg, mux, server, timeout)
|
handleV2(lg, mux, server, timeout)
|
||||||
return requestLogger(lg, mux)
|
return requestLogger(lg, mux)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user