From 4acaa5a2a0f3d714bebd406f1dc40c3ea7d19de9 Mon Sep 17 00:00:00 2001 From: tangcong Date: Sat, 6 Jun 2020 14:24:41 +0800 Subject: [PATCH 1/3] etcdserver/api/etcdhttp: add reason field for /health response --- etcdserver/api/etcdhttp/metrics.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/etcdserver/api/etcdhttp/metrics.go b/etcdserver/api/etcdhttp/metrics.go index e6365a8c9..b34a26dfe 100644 --- a/etcdserver/api/etcdhttp/metrics.go +++ b/etcdserver/api/etcdhttp/metrics.go @@ -90,6 +90,7 @@ func init() { // TODO: remove manual parsing in etcdctl cluster-health type Health struct { Health string `json:"health"` + Reason string `json:"reason"` } // TODO: server NOSPACE, etcdserver.ErrNoLeader in health API @@ -109,6 +110,14 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2) (h Health) { if len(as) > 0 { h.Health = "false" for _, v := range as { + switch v.Alarm { + case etcdserverpb.AlarmType_NOSPACE: + h.Reason = "ALARM NOSPACE" + case etcdserverpb.AlarmType_CORRUPT: + h.Reason = "ALARM CORRUPT" + default: + h.Reason = "ALARM UNKNOWN" + } lg.Warn("serving /health false due to an alarm", zap.String("alarm", v.String())) } return @@ -116,6 +125,7 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2) (h Health) { if uint64(srv.Leader()) == raft.None { h.Health = "false" + h.Reason = "RAFT NO LEADER" lg.Warn("serving /health false; no leader") return } @@ -125,6 +135,7 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2) (h Health) { cancel() if err != nil { h.Health = "false" + h.Reason = "QGET ERROR" lg.Warn("serving /health false; QGET fails", zap.Error(err)) } From f77b21ce055227c00a5727d1de7eda35fef8f03b Mon Sep 17 00:00:00 2001 From: tangcong Date: Sat, 6 Jun 2020 14:35:30 +0800 Subject: [PATCH 2/3] CHANGELOG: update for 11983 --- CHANGELOG-3.5.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG-3.5.md b/CHANGELOG-3.5.md index f3062e0da..6fe21c4bd 100644 --- a/CHANGELOG-3.5.md +++ b/CHANGELOG-3.5.md @@ -122,6 +122,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change. - See https://github.com/etcd-io/etcd/issues/11918. - Improve logging around snapshot send and receive. - [Push down RangeOptions.limit argv into index tree to reduce memory overhead](https://github.com/etcd-io/etcd/pull/11990). +- Add [reason field for /health response](https://github.com/etcd-io/etcd/pull/11983). ### Package `embed` From 330424142c7fb86876004debac396a5d23b01708 Mon Sep 17 00:00:00 2001 From: tangcong Date: Sat, 6 Jun 2020 15:35:53 +0800 Subject: [PATCH 3/3] tests/e2e: fix failed /health test case --- tests/e2e/ctl_v3_alarm_test.go | 2 +- tests/e2e/metrics_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/ctl_v3_alarm_test.go b/tests/e2e/ctl_v3_alarm_test.go index 4d3df2a93..a6ef5e398 100644 --- a/tests/e2e/ctl_v3_alarm_test.go +++ b/tests/e2e/ctl_v3_alarm_test.go @@ -53,7 +53,7 @@ func alarmTest(cx ctlCtx) { } // '/health' handler should return 'false' - if err := cURLGet(cx.epc, cURLReq{endpoint: "/health", expected: `{"health":"false"}`}); err != nil { + if err := cURLGet(cx.epc, cURLReq{endpoint: "/health", expected: `{"health":"false","reason":"ALARM NOSPACE"}`}); err != nil { cx.t.Fatalf("failed get with curl (%v)", err) } diff --git a/tests/e2e/metrics_test.go b/tests/e2e/metrics_test.go index 1ce401d93..e20b0f88d 100644 --- a/tests/e2e/metrics_test.go +++ b/tests/e2e/metrics_test.go @@ -49,7 +49,7 @@ func metricsTest(cx ctlCtx) { {"/metrics", fmt.Sprintf("etcd_mvcc_delete_total 3")}, {"/metrics", fmt.Sprintf(`etcd_server_version{server_version="%s"} 1`, version.Version)}, {"/metrics", fmt.Sprintf(`etcd_cluster_version{cluster_version="%s"} 1`, version.Cluster(version.Version))}, - {"/health", `{"health":"true"}`}, + {"/health", `{"health":"true","reason":""}`}, } { i++ if err := ctlV3Put(cx, fmt.Sprintf("%d", i), "v", ""); err != nil {