etcdctl: cluster-health supports forever flag

cluster-health command supports checking the cluster health forever.
2024-09-27 06:25:44 +00:00 · 2015-07-30 10:01:47 +08:00 · 2015-07-30 10:01:47 +08:00 · f7f00b0af6
commit f7f00b0af6
parent 219ed1695b
1 changed files with 74 additions and 43 deletions
--- a/etcdctl/command/cluster_health.go
+++ b/etcdctl/command/cluster_health.go
@ -6,6 +6,7 @@ import (
 	"fmt"
 	"net/http"
 	"os"
+	"os/signal"
 	"sort"
 	"time"

@ -15,74 +16,104 @@ import (

 func NewClusterHealthCommand() cli.Command {
 	return cli.Command{
-		Name:   "cluster-health",
-		Usage:  "check the health of the etcd cluster",
-		Flags:  []cli.Flag{},
+		Name:  "cluster-health",
+		Usage: "check the health of the etcd cluster",
+		Flags: []cli.Flag{
+			cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"},
+		},
 		Action: handleClusterHealth,
 	}
 }

 func handleClusterHealth(c *cli.Context) {
+	forever := c.Bool("forever")
+	if forever {
+		sigch := make(chan os.Signal, 1)
+		signal.Notify(sigch, os.Interrupt)
+
+		go func() {
+			<-sigch
+			os.Exit(0)
+		}()
+	}
+
 	tr, err := getTransport(c)
 	if err != nil {
 		handleError(ExitServerError, err)
 	}

+	// TODO: update members when forever is set.
 	mi := mustNewMembersAPI(c)
 	ms, err := mi.List(context.TODO())
 	if err != nil {
+		fmt.Println("cluster may be unhealthy: failed to list members")
 		handleError(ExitServerError, err)
 	}
-
 	cl := make([]string, 0)
 	for _, m := range ms {
 		cl = append(cl, m.ClientURLs...)
 	}

-	// check the /health endpoint of all members first
+	for {
+		// check the /health endpoint of all members first

-	ep, rs0, err := getLeaderStatus(tr, cl)
-	if err != nil {
-		fmt.Println("cluster may be unhealthy: failed to connect", cl)
-		os.Exit(1)
-	}
-
-	time.Sleep(time.Second)
-
-	// are all the members makeing progress?
-	_, rs1, err := getLeaderStatus(tr, []string{ep})
-	if err != nil {
-		fmt.Println("cluster is unhealthy")
-		os.Exit(1)
-	}
-
-	if rs1.Commit > rs0.Commit {
-		fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit)
-	} else {
-		fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit)
-	}
-	fmt.Printf("leader is %v\n", rs0.Lead)
-
-	var prints []string
-
-	for id, pr0 := range rs0.Progress {
-		pr1, ok := rs1.Progress[id]
-		if !ok {
-			fmt.Println("Cluster configuration changed during health checking. Please retry.")
+		ep, rs0, err := getLeaderStatus(tr, cl)
+		if err != nil {
+			fmt.Println("cluster may be unhealthy: failed to connect", cl)
+			if forever {
+				time.Sleep(10 * time.Second)
+				continue
+			}
 			os.Exit(1)
 		}
-		if pr1.Match <= pr0.Match {
-			prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
-		} else {
-			prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
-		}
-	}

-	sort.Strings(prints)
-	for _, p := range prints {
-		fmt.Print(p)
+		time.Sleep(time.Second)
+
+		// are all the members makeing progress?
+		_, rs1, err := getLeaderStatus(tr, []string{ep})
+		if err != nil {
+			fmt.Println("cluster is unhealthy")
+			if forever {
+				time.Sleep(10 * time.Second)
+				continue
+			}
+			os.Exit(1)
+		}
+
+		if rs1.Commit > rs0.Commit {
+			fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit)
+		} else {
+			fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit)
+		}
+		fmt.Printf("leader is %v\n", rs0.Lead)
+
+		var prints []string
+
+		for id, pr0 := range rs0.Progress {
+			pr1, ok := rs1.Progress[id]
+			if !ok {
+				// TODO: forever should handle configuration change.
+				fmt.Println("Cluster configuration changed during health checking. Please retry.")
+				os.Exit(1)
+			}
+			if pr1.Match <= pr0.Match {
+				prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
+			} else {
+				prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
+			}
+		}
+
+		sort.Strings(prints)
+		for _, p := range prints {
+			fmt.Print(p)
+		}
+
+		if !forever {
+			return
+		}
+
+		time.Sleep(10 * time.Second)
 	}
-	os.Exit(0)
 }

 type raftStatus struct {