mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

We made a mistake on the health endpoint by returning a string "true". We have to make the etcdctl works for the next version of etcd which will correct the mistake on the server side. It is too late to change the server side right now since we already released a version of etcdctl that only understands "true".
117 lines
2.6 KiB
Go
117 lines
2.6 KiB
Go
package command
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"time"
|
|
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
|
|
"github.com/coreos/etcd/client"
|
|
)
|
|
|
|
func NewClusterHealthCommand() cli.Command {
|
|
return cli.Command{
|
|
Name: "cluster-health",
|
|
Usage: "check the health of the etcd cluster",
|
|
Flags: []cli.Flag{
|
|
cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"},
|
|
},
|
|
Action: handleClusterHealth,
|
|
}
|
|
}
|
|
|
|
func handleClusterHealth(c *cli.Context) {
|
|
forever := c.Bool("forever")
|
|
if forever {
|
|
sigch := make(chan os.Signal, 1)
|
|
signal.Notify(sigch, os.Interrupt)
|
|
|
|
go func() {
|
|
<-sigch
|
|
os.Exit(0)
|
|
}()
|
|
}
|
|
|
|
tr, err := getTransport(c)
|
|
if err != nil {
|
|
handleError(ExitServerError, err)
|
|
}
|
|
|
|
hc := http.Client{
|
|
Transport: tr,
|
|
}
|
|
|
|
cln := mustNewClientNoSync(c)
|
|
mi := client.NewMembersAPI(cln)
|
|
ms, err := mi.List(context.TODO())
|
|
if err != nil {
|
|
fmt.Println("cluster may be unhealthy: failed to list members")
|
|
handleError(ExitServerError, err)
|
|
}
|
|
|
|
for {
|
|
health := false
|
|
for _, m := range ms {
|
|
if len(m.ClientURLs) == 0 {
|
|
fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID)
|
|
continue
|
|
}
|
|
|
|
checked := false
|
|
for _, url := range m.ClientURLs {
|
|
resp, err := hc.Get(url + "/health")
|
|
if err != nil {
|
|
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
|
|
continue
|
|
}
|
|
|
|
result := struct{ Health string }{}
|
|
nresult := struct{ Health bool }{}
|
|
bytes, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
|
|
continue
|
|
}
|
|
resp.Body.Close()
|
|
|
|
err = json.Unmarshal(bytes, &result)
|
|
if err != nil {
|
|
err = json.Unmarshal(bytes, &nresult)
|
|
}
|
|
if err != nil {
|
|
fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
|
|
continue
|
|
}
|
|
|
|
checked = true
|
|
if result.Health == "true" {
|
|
health = true
|
|
fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
|
|
} else {
|
|
fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
|
|
}
|
|
break
|
|
}
|
|
if !checked {
|
|
fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
|
|
}
|
|
}
|
|
if health {
|
|
fmt.Println("cluster is healthy")
|
|
} else {
|
|
fmt.Println("cluster is unhealthy")
|
|
}
|
|
|
|
if !forever {
|
|
break
|
|
}
|
|
fmt.Printf("\nnext check after 10 second...\n\n")
|
|
time.Sleep(10 * time.Second)
|
|
}
|
|
}
|