mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdctl: cluster-health supports forever flag
cluster-health command supports checking the cluster health forever.
This commit is contained in:
parent
219ed1695b
commit
f7f00b0af6
@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"os/signal"
|
||||||
"sort"
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -15,74 +16,104 @@ import (
|
|||||||
|
|
||||||
func NewClusterHealthCommand() cli.Command {
|
func NewClusterHealthCommand() cli.Command {
|
||||||
return cli.Command{
|
return cli.Command{
|
||||||
Name: "cluster-health",
|
Name: "cluster-health",
|
||||||
Usage: "check the health of the etcd cluster",
|
Usage: "check the health of the etcd cluster",
|
||||||
Flags: []cli.Flag{},
|
Flags: []cli.Flag{
|
||||||
|
cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"},
|
||||||
|
},
|
||||||
Action: handleClusterHealth,
|
Action: handleClusterHealth,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func handleClusterHealth(c *cli.Context) {
|
func handleClusterHealth(c *cli.Context) {
|
||||||
|
forever := c.Bool("forever")
|
||||||
|
if forever {
|
||||||
|
sigch := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigch, os.Interrupt)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-sigch
|
||||||
|
os.Exit(0)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
tr, err := getTransport(c)
|
tr, err := getTransport(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(ExitServerError, err)
|
handleError(ExitServerError, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: update members when forever is set.
|
||||||
mi := mustNewMembersAPI(c)
|
mi := mustNewMembersAPI(c)
|
||||||
ms, err := mi.List(context.TODO())
|
ms, err := mi.List(context.TODO())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
fmt.Println("cluster may be unhealthy: failed to list members")
|
||||||
handleError(ExitServerError, err)
|
handleError(ExitServerError, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cl := make([]string, 0)
|
cl := make([]string, 0)
|
||||||
for _, m := range ms {
|
for _, m := range ms {
|
||||||
cl = append(cl, m.ClientURLs...)
|
cl = append(cl, m.ClientURLs...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// check the /health endpoint of all members first
|
for {
|
||||||
|
// check the /health endpoint of all members first
|
||||||
|
|
||||||
ep, rs0, err := getLeaderStatus(tr, cl)
|
ep, rs0, err := getLeaderStatus(tr, cl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("cluster may be unhealthy: failed to connect", cl)
|
fmt.Println("cluster may be unhealthy: failed to connect", cl)
|
||||||
os.Exit(1)
|
if forever {
|
||||||
}
|
time.Sleep(10 * time.Second)
|
||||||
|
continue
|
||||||
time.Sleep(time.Second)
|
}
|
||||||
|
|
||||||
// are all the members makeing progress?
|
|
||||||
_, rs1, err := getLeaderStatus(tr, []string{ep})
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("cluster is unhealthy")
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
if rs1.Commit > rs0.Commit {
|
|
||||||
fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit)
|
|
||||||
} else {
|
|
||||||
fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit)
|
|
||||||
}
|
|
||||||
fmt.Printf("leader is %v\n", rs0.Lead)
|
|
||||||
|
|
||||||
var prints []string
|
|
||||||
|
|
||||||
for id, pr0 := range rs0.Progress {
|
|
||||||
pr1, ok := rs1.Progress[id]
|
|
||||||
if !ok {
|
|
||||||
fmt.Println("Cluster configuration changed during health checking. Please retry.")
|
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
if pr1.Match <= pr0.Match {
|
|
||||||
prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
|
|
||||||
} else {
|
|
||||||
prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sort.Strings(prints)
|
time.Sleep(time.Second)
|
||||||
for _, p := range prints {
|
|
||||||
fmt.Print(p)
|
// are all the members makeing progress?
|
||||||
|
_, rs1, err := getLeaderStatus(tr, []string{ep})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("cluster is unhealthy")
|
||||||
|
if forever {
|
||||||
|
time.Sleep(10 * time.Second)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if rs1.Commit > rs0.Commit {
|
||||||
|
fmt.Printf("cluster is healthy: raft is making progress [commit index: %v->%v]\n", rs0.Commit, rs1.Commit)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("cluster is unhealthy: raft is not making progress [commit index: %v]\n", rs0.Commit)
|
||||||
|
}
|
||||||
|
fmt.Printf("leader is %v\n", rs0.Lead)
|
||||||
|
|
||||||
|
var prints []string
|
||||||
|
|
||||||
|
for id, pr0 := range rs0.Progress {
|
||||||
|
pr1, ok := rs1.Progress[id]
|
||||||
|
if !ok {
|
||||||
|
// TODO: forever should handle configuration change.
|
||||||
|
fmt.Println("Cluster configuration changed during health checking. Please retry.")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
if pr1.Match <= pr0.Match {
|
||||||
|
prints = append(prints, fmt.Sprintf("member %s is unhealthy: raft is not making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
|
||||||
|
} else {
|
||||||
|
prints = append(prints, fmt.Sprintf("member %s is healthy: raft is making progress [match: %v->%v]\n", id, pr0.Match, pr1.Match))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(prints)
|
||||||
|
for _, p := range prints {
|
||||||
|
fmt.Print(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !forever {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(10 * time.Second)
|
||||||
}
|
}
|
||||||
os.Exit(0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type raftStatus struct {
|
type raftStatus struct {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user