etcdctl: cluster-health supports forever flag

cluster-health command supports checking the cluster health
forever.
This commit is contained in:
Xiang Li 2015-07-30 10:01:47 +08:00
parent 219ed1695b
commit f7f00b0af6

View File

@ -6,6 +6,7 @@ import (
"fmt"
"net/http"
"os"
"os/signal"
"sort"
"time"
@ -15,74 +16,104 @@ import (
func NewClusterHealthCommand() cli.Command {
return cli.Command{
Name: "cluster-health",
Usage: "check the health of the etcd cluster",
Flags: []cli.Flag{},
Name: "cluster-health",
Usage: "check the health of the etcd cluster",
Flags: []cli.Flag{
cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"},
},
Action: handleClusterHealth,
}
}
func handleClusterHealth(c *cli.Context) {
forever := c.Bool("forever")
if forever {
sigch := make(chan os.Signal, 1)
signal.Notify(sigch, os.Interrupt)
go func() {
<-sigch
os.Exit(0)
}()
}
tr, err := getTransport(c)
if err != nil {
handleError(ExitServerError, err)
}
// TODO: update members when forever is set.
mi := mustNewMembersAPI(c)
ms, err := mi.List(context.TODO())
if err != nil {
fmt.Println("cluster may be unhealthy: failed to list members")
handleError(ExitServerError, err)
}
cl := make([]string, 0)
for _, m := range ms {
cl = append(cl, m.ClientURLs...)
}
// check the /health endpoint of all members first
for {
// check the /health endpoint of all members first
ep, rs0, err := getLeaderStatus(tr, cl)
if err != nil {
fmt.Println("cluster may be unhealthy: failed to connect", cl)
os.Exit(1)
}
time.Sleep(time.Second)
// are all the members makeing progress?
_, rs1, err := getLeaderStatus(tr, []string{ep})
if err != nil {
fmt.Println("cluster is unhealthy")
os.Exit(1)
}
if rs1.Commit > rs0.Commit {
fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit)
} else {
fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit)
}
fmt.Printf("leader is %v\n", rs0.Lead)
var prints []string
for id, pr0 := range rs0.Progress {
pr1, ok := rs1.Progress[id]
if !ok {
fmt.Println("Cluster configuration changed during health checking. Please retry.")
ep, rs0, err := getLeaderStatus(tr, cl)
if err != nil {
fmt.Println("cluster may be unhealthy: failed to connect", cl)
if forever {
time.Sleep(10 * time.Second)
continue
}
os.Exit(1)
}
if pr1.Match <= pr0.Match {
prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
} else {
prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
}
}
sort.Strings(prints)
for _, p := range prints {
fmt.Print(p)
time.Sleep(time.Second)
// are all the members makeing progress?
_, rs1, err := getLeaderStatus(tr, []string{ep})
if err != nil {
fmt.Println("cluster is unhealthy")
if forever {
time.Sleep(10 * time.Second)
continue
}
os.Exit(1)
}
if rs1.Commit > rs0.Commit {
fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit)
} else {
fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit)
}
fmt.Printf("leader is %v\n", rs0.Lead)
var prints []string
for id, pr0 := range rs0.Progress {
pr1, ok := rs1.Progress[id]
if !ok {
// TODO: forever should handle configuration change.
fmt.Println("Cluster configuration changed during health checking. Please retry.")
os.Exit(1)
}
if pr1.Match <= pr0.Match {
prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
} else {
prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
}
}
sort.Strings(prints)
for _, p := range prints {
fmt.Print(p)
}
if !forever {
return
}
time.Sleep(10 * time.Second)
}
os.Exit(0)
}
type raftStatus struct {