etcd/etcdctl/command/cluster_health.go
Yicheng Qin 423e3bbbd8 etcdctl/cluster_health: provide better message for empty client urls
It skips sync when init client, and prints out unreachable messagen and
points to notice when checking health of etcd members one by one.
2015-09-01 14:42:19 -07:00

107 lines
2.4 KiB
Go

package command
import (
"encoding/json"
"fmt"
"net/http"
"os"
"os/signal"
"time"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
"github.com/coreos/etcd/client"
)
func NewClusterHealthCommand() cli.Command {
return cli.Command{
Name: "cluster-health",
Usage: "check the health of the etcd cluster",
Flags: []cli.Flag{
cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"},
},
Action: handleClusterHealth,
}
}
func handleClusterHealth(c *cli.Context) {
forever := c.Bool("forever")
if forever {
sigch := make(chan os.Signal, 1)
signal.Notify(sigch, os.Interrupt)
go func() {
<-sigch
os.Exit(0)
}()
}
tr, err := getTransport(c)
if err != nil {
handleError(ExitServerError, err)
}
hc := http.Client{
Transport: tr,
}
cln := mustNewClientNoSync(c)
mi := client.NewMembersAPI(cln)
ms, err := mi.List(context.TODO())
if err != nil {
fmt.Println("cluster may be unhealthy: failed to list members")
handleError(ExitServerError, err)
}
for {
health := false
for _, m := range ms {
if len(m.ClientURLs) == 0 {
fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID)
continue
}
checked := false
for _, url := range m.ClientURLs {
resp, err := hc.Get(url + "/health")
if err != nil {
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
continue
}
result := struct{ Health string }{}
d := json.NewDecoder(resp.Body)
err = d.Decode(&result)
resp.Body.Close()
if err != nil {
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
continue
}
checked = true
if result.Health == "true" {
health = true
fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
} else {
fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
}
break
}
if !checked {
fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
}
}
if health {
fmt.Println("cluster is healthy")
} else {
fmt.Println("cluster is unhealthy")
}
if !forever {
break
}
fmt.Printf("\nnext check after 10 second...\n\n")
time.Sleep(10 * time.Second)
}
}