From f7f00b0af6fde87f6f542469fdbd81584be53af0 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Thu, 30 Jul 2015 10:01:47 +0800 Subject: [PATCH] etcdctl: cluster-health supports forever flag cluster-health command supports checking the cluster health forever. --- etcdctl/command/cluster_health.go | 117 +++++++++++++++++++----------- 1 file changed, 74 insertions(+), 43 deletions(-) diff --git a/etcdctl/command/cluster_health.go b/etcdctl/command/cluster_health.go index ffe940c13..cd9f2418b 100644 --- a/etcdctl/command/cluster_health.go +++ b/etcdctl/command/cluster_health.go @@ -6,6 +6,7 @@ import ( "fmt" "net/http" "os" + "os/signal" "sort" "time" @@ -15,74 +16,104 @@ import ( func NewClusterHealthCommand() cli.Command { return cli.Command{ - Name: "cluster-health", - Usage: "check the health of the etcd cluster", - Flags: []cli.Flag{}, + Name: "cluster-health", + Usage: "check the health of the etcd cluster", + Flags: []cli.Flag{ + cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"}, + }, Action: handleClusterHealth, } } func handleClusterHealth(c *cli.Context) { + forever := c.Bool("forever") + if forever { + sigch := make(chan os.Signal, 1) + signal.Notify(sigch, os.Interrupt) + + go func() { + <-sigch + os.Exit(0) + }() + } + tr, err := getTransport(c) if err != nil { handleError(ExitServerError, err) } + // TODO: update members when forever is set. mi := mustNewMembersAPI(c) ms, err := mi.List(context.TODO()) if err != nil { + fmt.Println("cluster may be unhealthy: failed to list members") handleError(ExitServerError, err) } - cl := make([]string, 0) for _, m := range ms { cl = append(cl, m.ClientURLs...) } - // check the /health endpoint of all members first + for { + // check the /health endpoint of all members first - ep, rs0, err := getLeaderStatus(tr, cl) - if err != nil { - fmt.Println("cluster may be unhealthy: failed to connect", cl) - os.Exit(1) - } - - time.Sleep(time.Second) - - // are all the members makeing progress? - _, rs1, err := getLeaderStatus(tr, []string{ep}) - if err != nil { - fmt.Println("cluster is unhealthy") - os.Exit(1) - } - - if rs1.Commit > rs0.Commit { - fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit) - } else { - fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit) - } - fmt.Printf("leader is %v\n", rs0.Lead) - - var prints []string - - for id, pr0 := range rs0.Progress { - pr1, ok := rs1.Progress[id] - if !ok { - fmt.Println("Cluster configuration changed during health checking. Please retry.") + ep, rs0, err := getLeaderStatus(tr, cl) + if err != nil { + fmt.Println("cluster may be unhealthy: failed to connect", cl) + if forever { + time.Sleep(10 * time.Second) + continue + } os.Exit(1) } - if pr1.Match <= pr0.Match { - prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match)) - } else { - prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match)) - } - } - sort.Strings(prints) - for _, p := range prints { - fmt.Print(p) + time.Sleep(time.Second) + + // are all the members makeing progress? + _, rs1, err := getLeaderStatus(tr, []string{ep}) + if err != nil { + fmt.Println("cluster is unhealthy") + if forever { + time.Sleep(10 * time.Second) + continue + } + os.Exit(1) + } + + if rs1.Commit > rs0.Commit { + fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit) + } else { + fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit) + } + fmt.Printf("leader is %v\n", rs0.Lead) + + var prints []string + + for id, pr0 := range rs0.Progress { + pr1, ok := rs1.Progress[id] + if !ok { + // TODO: forever should handle configuration change. + fmt.Println("Cluster configuration changed during health checking. Please retry.") + os.Exit(1) + } + if pr1.Match <= pr0.Match { + prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match)) + } else { + prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match)) + } + } + + sort.Strings(prints) + for _, p := range prints { + fmt.Print(p) + } + + if !forever { + return + } + + time.Sleep(10 * time.Second) } - os.Exit(0) } type raftStatus struct {