mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdctl: support healthy checking
This commit is contained in:
parent
c94db98177
commit
7bbdad9068
140
etcdctl/command/cluster_health.go
Normal file
140
etcdctl/command/cluster_health.go
Normal file
@ -0,0 +1,140 @@
|
||||
package command
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
|
||||
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
|
||||
"github.com/coreos/etcd/etcdserver/stats"
|
||||
)
|
||||
|
||||
func NewClusterHealthCommand() cli.Command {
|
||||
return cli.Command{
|
||||
Name: "cluster-health",
|
||||
Usage: "check the health of the etcd cluster",
|
||||
Flags: []cli.Flag{},
|
||||
Action: handleClusterHealth,
|
||||
}
|
||||
}
|
||||
|
||||
func handleClusterHealth(c *cli.Context) {
|
||||
endpoints, err := getEndpoints(c)
|
||||
if err != nil {
|
||||
handleError(ErrorFromEtcd, err)
|
||||
}
|
||||
tr, err := getTransport(c)
|
||||
if err != nil {
|
||||
handleError(ErrorFromEtcd, err)
|
||||
}
|
||||
|
||||
client := etcd.NewClient(endpoints)
|
||||
client.SetTransport(tr)
|
||||
|
||||
if c.GlobalBool("debug") {
|
||||
go dumpCURL(client)
|
||||
}
|
||||
|
||||
if ok := client.SyncCluster(); !ok {
|
||||
handleError(FailedToConnectToHost, errors.New("cannot sync with the cluster using endpoints "+strings.Join(endpoints, ", ")))
|
||||
}
|
||||
|
||||
// do we have a leader?
|
||||
ep, ls0, err := getLeaderStats(tr, client.GetCluster())
|
||||
if err != nil {
|
||||
fmt.Println("cluster is unhealthy")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// is raft stable and making progress?
|
||||
client = etcd.NewClient([]string{ep})
|
||||
resp, err := client.Get("/", false, false)
|
||||
if err != nil {
|
||||
fmt.Println("cluster is unhealthy")
|
||||
os.Exit(1)
|
||||
}
|
||||
rt0, ri0 := resp.RaftTerm, resp.RaftIndex
|
||||
time.Sleep(time.Second)
|
||||
|
||||
resp, err = client.Get("/", false, false)
|
||||
if err != nil {
|
||||
fmt.Println("cluster is unhealthy")
|
||||
os.Exit(1)
|
||||
}
|
||||
rt1, ri1 := resp.RaftTerm, resp.RaftIndex
|
||||
|
||||
if rt0 != rt1 {
|
||||
fmt.Println("cluster is unhealthy")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if ri1 == ri0 {
|
||||
fmt.Println("cluster is unhealthy")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// are all the members makeing progress?
|
||||
_, ls1, err := getLeaderStats(tr, []string{ep})
|
||||
if err != nil {
|
||||
fmt.Println("cluster is unhealthy")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("cluster is healthy")
|
||||
// self is healthy
|
||||
var prints []string
|
||||
|
||||
prints = append(prints, fmt.Sprintf("member %s is healthy\n", ls1.Leader))
|
||||
for name, fs0 := range ls0.Followers {
|
||||
fs1, ok := ls1.Followers[name]
|
||||
if !ok {
|
||||
fmt.Println("Cluster configuration changed during health checking. Please retry.")
|
||||
os.Exit(1)
|
||||
}
|
||||
if fs1.Counts.Success <= fs0.Counts.Success {
|
||||
prints = append(prints, fmt.Sprintf("member %s is unhealthy\n", name))
|
||||
} else {
|
||||
prints = append(prints, fmt.Sprintf("member %s is healthy\n", name))
|
||||
}
|
||||
}
|
||||
|
||||
sort.Strings(prints)
|
||||
for _, p := range prints {
|
||||
fmt.Print(p)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func getLeaderStats(tr *http.Transport, endpoints []string) (string, *stats.LeaderStats, error) {
|
||||
// go-etcd does not support cluster stats, use http client for now
|
||||
// TODO: use new etcd client with new member/stats endpoint
|
||||
httpclient := http.Client{
|
||||
Transport: tr,
|
||||
}
|
||||
|
||||
for _, ep := range endpoints {
|
||||
resp, err := httpclient.Get(ep + "/v2/stats/leader")
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
continue
|
||||
}
|
||||
|
||||
ls := &stats.LeaderStats{}
|
||||
d := json.NewDecoder(resp.Body)
|
||||
err = d.Decode(ls)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
return ep, ls, nil
|
||||
}
|
||||
return "", nil, errors.New("no leader")
|
||||
}
|
@ -39,6 +39,7 @@ func main() {
|
||||
}
|
||||
app.Commands = []cli.Command{
|
||||
command.NewBackupCommand(),
|
||||
command.NewClusterHealthCommand(),
|
||||
command.NewMakeCommand(),
|
||||
command.NewMakeDirCommand(),
|
||||
command.NewRemoveCommand(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user