mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
143 lines
3.2 KiB
Go
143 lines
3.2 KiB
Go
package command
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
|
|
"github.com/coreos/etcd/etcdserver/stats"
|
|
)
|
|
|
|
func NewClusterHealthCommand() cli.Command {
|
|
return cli.Command{
|
|
Name: "cluster-health",
|
|
Usage: "check the health of the etcd cluster",
|
|
Flags: []cli.Flag{},
|
|
Action: handleClusterHealth,
|
|
}
|
|
}
|
|
|
|
func handleClusterHealth(c *cli.Context) {
|
|
endpoints, err := getEndpoints(c)
|
|
if err != nil {
|
|
handleError(ExitServerError, err)
|
|
}
|
|
tr, err := getTransport(c)
|
|
if err != nil {
|
|
handleError(ExitServerError, err)
|
|
}
|
|
|
|
client := etcd.NewClient(endpoints)
|
|
client.SetTransport(tr)
|
|
|
|
if c.GlobalBool("debug") {
|
|
go dumpCURL(client)
|
|
}
|
|
|
|
if ok := client.SyncCluster(); !ok {
|
|
handleError(ExitBadConnection, errors.New("cannot sync with the cluster using endpoints "+strings.Join(endpoints, ", ")))
|
|
}
|
|
|
|
// do we have a leader?
|
|
cl := client.GetCluster()
|
|
ep, ls0, err := getLeaderStats(tr, cl)
|
|
if err != nil {
|
|
fmt.Println("cluster may be unhealthy: failed to connect", cl)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// is raft stable and making progress?
|
|
client = etcd.NewClient([]string{ep})
|
|
client.SetTransport(tr)
|
|
resp, err := client.Get("/", false, false)
|
|
if err != nil {
|
|
fmt.Println("cluster is unhealthy")
|
|
os.Exit(1)
|
|
}
|
|
rt0, ri0 := resp.RaftTerm, resp.RaftIndex
|
|
time.Sleep(time.Second)
|
|
|
|
resp, err = client.Get("/", false, false)
|
|
if err != nil {
|
|
fmt.Println("cluster is unhealthy")
|
|
os.Exit(1)
|
|
}
|
|
rt1, ri1 := resp.RaftTerm, resp.RaftIndex
|
|
|
|
if rt0 != rt1 {
|
|
fmt.Println("cluster is unhealthy")
|
|
os.Exit(1)
|
|
}
|
|
|
|
if ri1 == ri0 {
|
|
fmt.Println("cluster is unhealthy")
|
|
os.Exit(1)
|
|
}
|
|
|
|
// are all the members makeing progress?
|
|
_, ls1, err := getLeaderStats(tr, []string{ep})
|
|
if err != nil {
|
|
fmt.Println("cluster is unhealthy")
|
|
os.Exit(1)
|
|
}
|
|
|
|
fmt.Println("cluster is healthy")
|
|
// self is healthy
|
|
var prints []string
|
|
|
|
prints = append(prints, fmt.Sprintf("member %s is healthy\n", ls1.Leader))
|
|
for name, fs0 := range ls0.Followers {
|
|
fs1, ok := ls1.Followers[name]
|
|
if !ok {
|
|
fmt.Println("Cluster configuration changed during health checking. Please retry.")
|
|
os.Exit(1)
|
|
}
|
|
if fs1.Counts.Success <= fs0.Counts.Success {
|
|
prints = append(prints, fmt.Sprintf("member %s is unhealthy\n", name))
|
|
} else {
|
|
prints = append(prints, fmt.Sprintf("member %s is healthy\n", name))
|
|
}
|
|
}
|
|
|
|
sort.Strings(prints)
|
|
for _, p := range prints {
|
|
fmt.Print(p)
|
|
}
|
|
os.Exit(0)
|
|
}
|
|
|
|
func getLeaderStats(tr *http.Transport, endpoints []string) (string, *stats.LeaderStats, error) {
|
|
// go-etcd does not support cluster stats, use http client for now
|
|
// TODO: use new etcd client with new member/stats endpoint
|
|
httpclient := http.Client{
|
|
Transport: tr,
|
|
}
|
|
|
|
for _, ep := range endpoints {
|
|
resp, err := httpclient.Get(ep + "/v2/stats/leader")
|
|
if err != nil {
|
|
continue
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
continue
|
|
}
|
|
|
|
ls := &stats.LeaderStats{}
|
|
d := json.NewDecoder(resp.Body)
|
|
err = d.Decode(ls)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
return ep, ls, nil
|
|
}
|
|
return "", nil, errors.New("no leader")
|
|
}
|