etcd/etcdctl/command/cluster_health.go
Xiang Li 04539c6240 etcdctl: prepare for health endpoint change
We made a mistake on the health endpoint by returning a string "true".
We have to make the etcdctl works for the next version of etcd which
will correct the mistake on the server side.

It is too late to change the server side right now since we already
released a version of etcdctl that only understands "true".
2015-09-04 10:20:24 -07:00

117 lines
2.6 KiB
Go

package command
import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"os/signal"
"time"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
"github.com/coreos/etcd/client"
)
func NewClusterHealthCommand() cli.Command {
return cli.Command{
Name: "cluster-health",
Usage: "check the health of the etcd cluster",
Flags: []cli.Flag{
cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"},
},
Action: handleClusterHealth,
}
}
func handleClusterHealth(c *cli.Context) {
forever := c.Bool("forever")
if forever {
sigch := make(chan os.Signal, 1)
signal.Notify(sigch, os.Interrupt)
go func() {
<-sigch
os.Exit(0)
}()
}
tr, err := getTransport(c)
if err != nil {
handleError(ExitServerError, err)
}
hc := http.Client{
Transport: tr,
}
cln := mustNewClientNoSync(c)
mi := client.NewMembersAPI(cln)
ms, err := mi.List(context.TODO())
if err != nil {
fmt.Println("cluster may be unhealthy: failed to list members")
handleError(ExitServerError, err)
}
for {
health := false
for _, m := range ms {
if len(m.ClientURLs) == 0 {
fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID)
continue
}
checked := false
for _, url := range m.ClientURLs {
resp, err := hc.Get(url + "/health")
if err != nil {
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
continue
}
result := struct{ Health string }{}
nresult := struct{ Health bool }{}
bytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
continue
}
resp.Body.Close()
err = json.Unmarshal(bytes, &result)
if err != nil {
err = json.Unmarshal(bytes, &nresult)
}
if err != nil {
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
continue
}
checked = true
if result.Health == "true" {
health = true
fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
} else {
fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
}
break
}
if !checked {
fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
}
}
if health {
fmt.Println("cluster is healthy")
} else {
fmt.Println("cluster is unhealthy")
}
if !forever {
break
}
fmt.Printf("\nnext check after 10 second...\n\n")
time.Sleep(10 * time.Second)
}
}