Merge pull request #8312 from gyuho/health-lists

api/etcdhttp: serve error information in '/health', marshal health in JSON
This commit is contained in:
Gyu-Ho Lee 2017-07-27 15:46:39 -07:00 committed by GitHub
commit b36463efe5
5 changed files with 20 additions and 10 deletions

View File

@ -45,12 +45,12 @@ It is important to monitor your production etcd cluster for healthy information
#### Health Monitoring #### Health Monitoring
At lowest level, etcd exposes health information via HTTP at `/health` in JSON format. If it returns `{"health": "true"}`, then the cluster is healthy. Please note the `/health` endpoint is still an experimental one as in etcd 2.2. At lowest level, etcd exposes health information via HTTP at `/health` in JSON format. If it returns `{"health":true}`, then the cluster is healthy.
``` ```
$ curl -L http://127.0.0.1:2379/health $ curl -L http://127.0.0.1:2379/health
{"health": "true"} {"health":true}
``` ```
You can also use etcdctl to check the cluster-wide health information. It will contact all the members of the cluster and collect the health information for you. You can also use etcdctl to check the cluster-wide health information. It will contact all the members of the cluster and collect the health information for you.

View File

@ -29,5 +29,5 @@ curl http://10.0.0.10:2379/health
``` ```
```json ```json
{"health": "true"} {"health":true}
``` ```

View File

@ -53,7 +53,7 @@ func alarmTest(cx ctlCtx) {
} }
// '/health' handler should return 'false' // '/health' handler should return 'false'
if err := cURLGet(cx.epc, cURLReq{endpoint: "/health", expected: `{"health": "false"}`}); err != nil { if err := cURLGet(cx.epc, cURLReq{endpoint: "/health", expected: `{"health":false,"errors":["NOSPACE"]}`}); err != nil {
cx.t.Fatalf("failed get with curl (%v)", err) cx.t.Fatalf("failed get with curl (%v)", err)
} }

View File

@ -41,7 +41,7 @@ func metricsTest(cx ctlCtx) {
if err := cURLGet(cx.epc, cURLReq{endpoint: "/metrics", expected: `etcd_debugging_mvcc_keys_total 1`, metricsURLScheme: cx.cfg.metricsURLScheme}); err != nil { if err := cURLGet(cx.epc, cURLReq{endpoint: "/metrics", expected: `etcd_debugging_mvcc_keys_total 1`, metricsURLScheme: cx.cfg.metricsURLScheme}); err != nil {
cx.t.Fatalf("failed get with curl (%v)", err) cx.t.Fatalf("failed get with curl (%v)", err)
} }
if err := cURLGet(cx.epc, cURLReq{endpoint: "/health", expected: `{"health": "true"}`, metricsURLScheme: cx.cfg.metricsURLScheme}); err != nil { if err := cURLGet(cx.epc, cURLReq{endpoint: "/health", expected: `{"health":true}`, metricsURLScheme: cx.cfg.metricsURLScheme}); err != nil {
cx.t.Fatalf("failed get with curl (%v)", err) cx.t.Fatalf("failed get with curl (%v)", err)
} }
} }

View File

@ -16,7 +16,7 @@ package etcdhttp
import ( import (
"context" "context"
"fmt" "encoding/json"
"net/http" "net/http"
"time" "time"
@ -57,7 +57,7 @@ func newHealthHandler(srv *etcdserver.EtcdServer) http.HandlerFunc {
return return
} }
h := checkHealth(srv) h := checkHealth(srv)
d := []byte(fmt.Sprintf(`{"health": "%v"}`, h.Health)) d, _ := json.Marshal(h)
if !h.Health { if !h.Health {
http.Error(w, string(d), http.StatusServiceUnavailable) http.Error(w, string(d), http.StatusServiceUnavailable)
return return
@ -67,24 +67,34 @@ func newHealthHandler(srv *etcdserver.EtcdServer) http.HandlerFunc {
} }
} }
// TODO: remove manual parsing in etcdctl cluster-health
type health struct { type health struct {
Health bool `json:"health"` Health bool `json:"health"`
Errors []string `json:"errors,omitempty"`
} }
func checkHealth(srv *etcdserver.EtcdServer) health { func checkHealth(srv *etcdserver.EtcdServer) health {
h := health{Health: false} h := health{Health: false}
if len(srv.Alarms()) > 0 {
// TODO: provide alarm lists as := srv.Alarms()
if len(as) > 0 {
for _, v := range as {
h.Errors = append(h.Errors, v.Alarm.String())
}
return h return h
} }
if uint64(srv.Leader()) == raft.None { if uint64(srv.Leader()) == raft.None {
h.Errors = append(h.Errors, etcdserver.ErrNoLeader.Error())
return h return h
} }
ctx, cancel := context.WithTimeout(context.Background(), time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"}) _, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"})
cancel() cancel()
if err != nil {
h.Errors = append(h.Errors, err.Error())
}
h.Health = err == nil h.Health = err == nil
return h return h