mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
*: log server-side /health checks
To make it easier to root-cause when /health check fails. For example, we are using load balancer to health check each etcd instance, and when one etcd node gets terminated, it's hard to tell whether etcd "server" was really failing or client (or load balancer") failed to reach the etcd cluster which is also failure in load balancer health check. Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
This commit is contained in:
parent
1c16c242db
commit
92f180c574
@ -629,7 +629,7 @@ func (e *Etcd) serveClients() (err error) {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
etcdhttp.HandleBasic(mux, e.Server)
|
etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server)
|
||||||
h = mux
|
h = mux
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -664,7 +664,7 @@ func (e *Etcd) serveMetrics() (err error) {
|
|||||||
|
|
||||||
if len(e.cfg.ListenMetricsUrls) > 0 {
|
if len(e.cfg.ListenMetricsUrls) > 0 {
|
||||||
metricsMux := http.NewServeMux()
|
metricsMux := http.NewServeMux()
|
||||||
etcdhttp.HandleMetricsHealth(metricsMux, e.Server)
|
etcdhttp.HandleMetricsHealth(e.cfg.logger, metricsMux, e.Server)
|
||||||
|
|
||||||
for _, murl := range e.cfg.ListenMetricsUrls {
|
for _, murl := range e.cfg.ListenMetricsUrls {
|
||||||
tlsInfo := &e.cfg.ClientTLSInfo
|
tlsInfo := &e.cfg.ClientTLSInfo
|
||||||
|
@ -204,7 +204,7 @@ func startGRPCProxy(cmd *cobra.Command, args []string) {
|
|||||||
go func() {
|
go func() {
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
grpcproxy.HandleMetrics(mux, httpClient, client.Endpoints())
|
grpcproxy.HandleMetrics(mux, httpClient, client.Endpoints())
|
||||||
grpcproxy.HandleHealth(mux, client)
|
grpcproxy.HandleHealth(lg, mux, client)
|
||||||
lg.Info("gRPC proxy server metrics URL serving")
|
lg.Info("gRPC proxy server metrics URL serving")
|
||||||
herr := http.Serve(mhttpl, mux)
|
herr := http.Serve(mhttpl, mux)
|
||||||
if herr != nil {
|
if herr != nil {
|
||||||
@ -381,7 +381,7 @@ func mustHTTPListener(lg *zap.Logger, m cmux.CMux, tlsinfo *transport.TLSInfo, c
|
|||||||
httpmux := http.NewServeMux()
|
httpmux := http.NewServeMux()
|
||||||
httpmux.HandleFunc("/", http.NotFound)
|
httpmux.HandleFunc("/", http.NotFound)
|
||||||
grpcproxy.HandleMetrics(httpmux, httpClient, c.Endpoints())
|
grpcproxy.HandleMetrics(httpmux, httpClient, c.Endpoints())
|
||||||
grpcproxy.HandleHealth(httpmux, c)
|
grpcproxy.HandleHealth(lg, httpmux, c)
|
||||||
if grpcProxyEnablePprof {
|
if grpcProxyEnablePprof {
|
||||||
for p, h := range debugutil.PProfHandlers() {
|
for p, h := range debugutil.PProfHandlers() {
|
||||||
httpmux.Handle(p, h)
|
httpmux.Handle(p, h)
|
||||||
|
@ -25,7 +25,6 @@ import (
|
|||||||
"go.etcd.io/etcd/etcdserver/api/v2error"
|
"go.etcd.io/etcd/etcdserver/api/v2error"
|
||||||
"go.etcd.io/etcd/etcdserver/api/v2http/httptypes"
|
"go.etcd.io/etcd/etcdserver/api/v2http/httptypes"
|
||||||
"go.etcd.io/etcd/version"
|
"go.etcd.io/etcd/version"
|
||||||
|
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -37,10 +36,13 @@ const (
|
|||||||
|
|
||||||
// HandleBasic adds handlers to a mux for serving JSON etcd client requests
|
// HandleBasic adds handlers to a mux for serving JSON etcd client requests
|
||||||
// that do not access the v2 store.
|
// that do not access the v2 store.
|
||||||
func HandleBasic(mux *http.ServeMux, server etcdserver.ServerPeer) {
|
func HandleBasic(lg *zap.Logger, mux *http.ServeMux, server etcdserver.ServerPeer) {
|
||||||
|
if lg == nil {
|
||||||
|
lg = zap.NewNop()
|
||||||
|
}
|
||||||
mux.HandleFunc(varsPath, serveVars)
|
mux.HandleFunc(varsPath, serveVars)
|
||||||
|
|
||||||
HandleMetricsHealth(mux, server)
|
HandleMetricsHealth(lg, mux, server)
|
||||||
mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion))
|
mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,12 +20,12 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
"go.etcd.io/etcd/etcdserver"
|
"go.etcd.io/etcd/etcdserver"
|
||||||
"go.etcd.io/etcd/etcdserver/etcdserverpb"
|
"go.etcd.io/etcd/etcdserver/etcdserverpb"
|
||||||
"go.etcd.io/etcd/raft"
|
"go.etcd.io/etcd/raft"
|
||||||
|
"go.uber.org/zap"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -34,9 +34,9 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// HandleMetricsHealth registers metrics and health handlers.
|
// HandleMetricsHealth registers metrics and health handlers.
|
||||||
func HandleMetricsHealth(mux *http.ServeMux, srv etcdserver.ServerV2) {
|
func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) {
|
||||||
mux.Handle(PathMetrics, promhttp.Handler())
|
mux.Handle(PathMetrics, promhttp.Handler())
|
||||||
mux.Handle(PathHealth, NewHealthHandler(func() Health { return checkHealth(srv) }))
|
mux.Handle(PathHealth, NewHealthHandler(lg, func() Health { return checkHealth(lg, srv) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
// HandlePrometheus registers prometheus handler on '/metrics'.
|
// HandlePrometheus registers prometheus handler on '/metrics'.
|
||||||
@ -45,21 +45,24 @@ func HandlePrometheus(mux *http.ServeMux) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewHealthHandler handles '/health' requests.
|
// NewHealthHandler handles '/health' requests.
|
||||||
func NewHealthHandler(hfunc func() Health) http.HandlerFunc {
|
func NewHealthHandler(lg *zap.Logger, hfunc func() Health) http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
if r.Method != http.MethodGet {
|
if r.Method != http.MethodGet {
|
||||||
w.Header().Set("Allow", http.MethodGet)
|
w.Header().Set("Allow", http.MethodGet)
|
||||||
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
|
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
|
||||||
|
lg.Warn("/health error", zap.Int("status-code", http.StatusMethodNotAllowed))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
h := hfunc()
|
h := hfunc()
|
||||||
d, _ := json.Marshal(h)
|
d, _ := json.Marshal(h)
|
||||||
if h.Health != "true" {
|
if h.Health != "true" {
|
||||||
http.Error(w, string(d), http.StatusServiceUnavailable)
|
http.Error(w, string(d), http.StatusServiceUnavailable)
|
||||||
|
lg.Warn("/health error", zap.String("output", string(d)), zap.Int("status-code", http.StatusServiceUnavailable))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
w.Write(d)
|
w.Write(d)
|
||||||
|
lg.Info("/health OK", zap.Int("status-code", http.StatusOK))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,7 +94,7 @@ type Health struct {
|
|||||||
|
|
||||||
// TODO: server NOSPACE, etcdserver.ErrNoLeader in health API
|
// TODO: server NOSPACE, etcdserver.ErrNoLeader in health API
|
||||||
|
|
||||||
func checkHealth(srv etcdserver.ServerV2) (h Health) {
|
func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2) (h Health) {
|
||||||
h.Health = "true"
|
h.Health = "true"
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -105,11 +108,15 @@ func checkHealth(srv etcdserver.ServerV2) (h Health) {
|
|||||||
as := srv.Alarms()
|
as := srv.Alarms()
|
||||||
if len(as) > 0 {
|
if len(as) > 0 {
|
||||||
h.Health = "false"
|
h.Health = "false"
|
||||||
|
for _, v := range as {
|
||||||
|
lg.Warn("serving /health false due to an alarm", zap.String("alarm", v.String()))
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if uint64(srv.Leader()) == raft.None {
|
if uint64(srv.Leader()) == raft.None {
|
||||||
h.Health = "false"
|
h.Health = "false"
|
||||||
|
lg.Warn("serving /health false; no leader")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,6 +125,9 @@ func checkHealth(srv etcdserver.ServerV2) (h Health) {
|
|||||||
cancel()
|
cancel()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
h.Health = "false"
|
h.Health = "false"
|
||||||
|
lg.Warn("serving /health false; QGET fails", zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lg.Info("serving /health true")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -57,7 +57,7 @@ func NewClientHandler(lg *zap.Logger, server etcdserver.ServerPeer, timeout time
|
|||||||
lg = zap.NewNop()
|
lg = zap.NewNop()
|
||||||
}
|
}
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
etcdhttp.HandleBasic(mux, server)
|
etcdhttp.HandleBasic(lg, mux, server)
|
||||||
handleV2(lg, mux, server, timeout)
|
handleV2(lg, mux, server, timeout)
|
||||||
return requestLogger(lg, mux)
|
return requestLogger(lg, mux)
|
||||||
}
|
}
|
||||||
|
@ -22,11 +22,15 @@ import (
|
|||||||
"go.etcd.io/etcd/clientv3"
|
"go.etcd.io/etcd/clientv3"
|
||||||
"go.etcd.io/etcd/etcdserver/api/etcdhttp"
|
"go.etcd.io/etcd/etcdserver/api/etcdhttp"
|
||||||
"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
|
"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||||
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
// HandleHealth registers health handler on '/health'.
|
// HandleHealth registers health handler on '/health'.
|
||||||
func HandleHealth(mux *http.ServeMux, c *clientv3.Client) {
|
func HandleHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) {
|
||||||
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(func() etcdhttp.Health { return checkHealth(c) }))
|
if lg == nil {
|
||||||
|
lg = zap.NewNop()
|
||||||
|
}
|
||||||
|
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func() etcdhttp.Health { return checkHealth(c) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkHealth(c *clientv3.Client) etcdhttp.Health {
|
func checkHealth(c *clientv3.Client) etcdhttp.Health {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user