etcd/etcdctl/command/cluster_health.go
James Pharaoh 05ecdbc617 etcdtcl: fix etcdctl cluster-health ignores SSL settings
etcdctl reconnects to the leader, but was not picking up ssl settings in this
case, which causes it to show unhealthy when this is not the case.

Fixes #2373
2015-02-25 13:19:07 +01:00

142 lines
3.1 KiB
Go

package command
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"os"
"sort"
"strings"
"time"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
"github.com/coreos/etcd/etcdserver/stats"
)
func NewClusterHealthCommand() cli.Command {
return cli.Command{
Name: "cluster-health",
Usage: "check the health of the etcd cluster",
Flags: []cli.Flag{},
Action: handleClusterHealth,
}
}
func handleClusterHealth(c *cli.Context) {
endpoints, err := getEndpoints(c)
if err != nil {
handleError(ErrorFromEtcd, err)
}
tr, err := getTransport(c)
if err != nil {
handleError(ErrorFromEtcd, err)
}
client := etcd.NewClient(endpoints)
client.SetTransport(tr)
if c.GlobalBool("debug") {
go dumpCURL(client)
}
if ok := client.SyncCluster(); !ok {
handleError(FailedToConnectToHost, errors.New("cannot sync with the cluster using endpoints "+strings.Join(endpoints, ", ")))
}
// do we have a leader?
ep, ls0, err := getLeaderStats(tr, client.GetCluster())
if err != nil {
fmt.Println("cluster is unhealthy")
os.Exit(1)
}
// is raft stable and making progress?
client = etcd.NewClient([]string{ep})
client.SetTransport(tr)
resp, err := client.Get("/", false, false)
if err != nil {
fmt.Println("cluster is unhealthy")
os.Exit(1)
}
rt0, ri0 := resp.RaftTerm, resp.RaftIndex
time.Sleep(time.Second)
resp, err = client.Get("/", false, false)
if err != nil {
fmt.Println("cluster is unhealthy")
os.Exit(1)
}
rt1, ri1 := resp.RaftTerm, resp.RaftIndex
if rt0 != rt1 {
fmt.Println("cluster is unhealthy")
os.Exit(1)
}
if ri1 == ri0 {
fmt.Println("cluster is unhealthy")
os.Exit(1)
}
// are all the members makeing progress?
_, ls1, err := getLeaderStats(tr, []string{ep})
if err != nil {
fmt.Println("cluster is unhealthy")
os.Exit(1)
}
fmt.Println("cluster is healthy")
// self is healthy
var prints []string
prints = append(prints, fmt.Sprintf("member %s is healthy\n", ls1.Leader))
for name, fs0 := range ls0.Followers {
fs1, ok := ls1.Followers[name]
if !ok {
fmt.Println("Cluster configuration changed during health checking. Please retry.")
os.Exit(1)
}
if fs1.Counts.Success <= fs0.Counts.Success {
prints = append(prints, fmt.Sprintf("member %s is unhealthy\n", name))
} else {
prints = append(prints, fmt.Sprintf("member %s is healthy\n", name))
}
}
sort.Strings(prints)
for _, p := range prints {
fmt.Print(p)
}
os.Exit(0)
}
func getLeaderStats(tr *http.Transport, endpoints []string) (string, *stats.LeaderStats, error) {
// go-etcd does not support cluster stats, use http client for now
// TODO: use new etcd client with new member/stats endpoint
httpclient := http.Client{
Transport: tr,
}
for _, ep := range endpoints {
resp, err := httpclient.Get(ep + "/v2/stats/leader")
if err != nil {
continue
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
continue
}
ls := &stats.LeaderStats{}
d := json.NewDecoder(resp.Body)
err = d.Decode(ls)
if err != nil {
continue
}
return ep, ls, nil
}
return "", nil, errors.New("no leader")
}