mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #2259 from xiang90/healthy
etcdctl: support healthy checking
This commit is contained in:
commit
9c850b7182
140
etcdctl/command/cluster_health.go
Normal file
140
etcdctl/command/cluster_health.go
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
package command
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
|
||||||
|
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
|
||||||
|
"github.com/coreos/etcd/etcdserver/stats"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewClusterHealthCommand() cli.Command {
|
||||||
|
return cli.Command{
|
||||||
|
Name: "cluster-health",
|
||||||
|
Usage: "check the health of the etcd cluster",
|
||||||
|
Flags: []cli.Flag{},
|
||||||
|
Action: handleClusterHealth,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleClusterHealth(c *cli.Context) {
|
||||||
|
endpoints, err := getEndpoints(c)
|
||||||
|
if err != nil {
|
||||||
|
handleError(ErrorFromEtcd, err)
|
||||||
|
}
|
||||||
|
tr, err := getTransport(c)
|
||||||
|
if err != nil {
|
||||||
|
handleError(ErrorFromEtcd, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
client := etcd.NewClient(endpoints)
|
||||||
|
client.SetTransport(tr)
|
||||||
|
|
||||||
|
if c.GlobalBool("debug") {
|
||||||
|
go dumpCURL(client)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ok := client.SyncCluster(); !ok {
|
||||||
|
handleError(FailedToConnectToHost, errors.New("cannot sync with the cluster using endpoints "+strings.Join(endpoints, ", ")))
|
||||||
|
}
|
||||||
|
|
||||||
|
// do we have a leader?
|
||||||
|
ep, ls0, err := getLeaderStats(tr, client.GetCluster())
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("cluster is unhealthy")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// is raft stable and making progress?
|
||||||
|
client = etcd.NewClient([]string{ep})
|
||||||
|
resp, err := client.Get("/", false, false)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("cluster is unhealthy")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
rt0, ri0 := resp.RaftTerm, resp.RaftIndex
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
|
||||||
|
resp, err = client.Get("/", false, false)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("cluster is unhealthy")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
rt1, ri1 := resp.RaftTerm, resp.RaftIndex
|
||||||
|
|
||||||
|
if rt0 != rt1 {
|
||||||
|
fmt.Println("cluster is unhealthy")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ri1 == ri0 {
|
||||||
|
fmt.Println("cluster is unhealthy")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// are all the members makeing progress?
|
||||||
|
_, ls1, err := getLeaderStats(tr, []string{ep})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("cluster is unhealthy")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("cluster is healthy")
|
||||||
|
// self is healthy
|
||||||
|
var prints []string
|
||||||
|
|
||||||
|
prints = append(prints, fmt.Sprintf("member %s is healthy\n", ls1.Leader))
|
||||||
|
for name, fs0 := range ls0.Followers {
|
||||||
|
fs1, ok := ls1.Followers[name]
|
||||||
|
if !ok {
|
||||||
|
fmt.Println("Cluster configuration changed during health checking. Please retry.")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
if fs1.Counts.Success <= fs0.Counts.Success {
|
||||||
|
prints = append(prints, fmt.Sprintf("member %s is unhealthy\n", name))
|
||||||
|
} else {
|
||||||
|
prints = append(prints, fmt.Sprintf("member %s is healthy\n", name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(prints)
|
||||||
|
for _, p := range prints {
|
||||||
|
fmt.Print(p)
|
||||||
|
}
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getLeaderStats(tr *http.Transport, endpoints []string) (string, *stats.LeaderStats, error) {
|
||||||
|
// go-etcd does not support cluster stats, use http client for now
|
||||||
|
// TODO: use new etcd client with new member/stats endpoint
|
||||||
|
httpclient := http.Client{
|
||||||
|
Transport: tr,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, ep := range endpoints {
|
||||||
|
resp, err := httpclient.Get(ep + "/v2/stats/leader")
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ls := &stats.LeaderStats{}
|
||||||
|
d := json.NewDecoder(resp.Body)
|
||||||
|
err = d.Decode(ls)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return ep, ls, nil
|
||||||
|
}
|
||||||
|
return "", nil, errors.New("no leader")
|
||||||
|
}
|
@ -39,6 +39,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
app.Commands = []cli.Command{
|
app.Commands = []cli.Command{
|
||||||
command.NewBackupCommand(),
|
command.NewBackupCommand(),
|
||||||
|
command.NewClusterHealthCommand(),
|
||||||
command.NewMakeCommand(),
|
command.NewMakeCommand(),
|
||||||
command.NewMakeDirCommand(),
|
command.NewMakeDirCommand(),
|
||||||
command.NewRemoveCommand(),
|
command.NewRemoveCommand(),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user