From 9e69aba7aa8300d5338b1cdae2b57c4af1dc4517 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Wed, 4 Mar 2015 16:21:20 -0800 Subject: [PATCH 1/6] tools/etcd-agent: add main func --- tools/functional-tester/etcd-agent/main.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/functional-tester/etcd-agent/main.go b/tools/functional-tester/etcd-agent/main.go index f5f7042ab..481f84bf6 100644 --- a/tools/functional-tester/etcd-agent/main.go +++ b/tools/functional-tester/etcd-agent/main.go @@ -14,6 +14,21 @@ package main +import ( + "flag" + "log" +) + func main() { - panic("not implemented") + etcdPath := flag.String("etcd-path", "/opt/etcd/bin/etcd", "") + flag.Parse() + + a, err := newAgent(*etcdPath) + if err != nil { + log.Fatal(err) + } + a.serveRPC() + + var done chan struct{} + <-done } From 008bbd2b84c1501d5760142199351ac044ab09f4 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Wed, 4 Mar 2015 16:29:19 -0800 Subject: [PATCH 2/6] tools/etcd-agent: log rpc actions --- tools/functional-tester/etcd-agent/rpc.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/functional-tester/etcd-agent/rpc.go b/tools/functional-tester/etcd-agent/rpc.go index 1c42bae85..de4c2c92a 100644 --- a/tools/functional-tester/etcd-agent/rpc.go +++ b/tools/functional-tester/etcd-agent/rpc.go @@ -32,20 +32,22 @@ func (a *Agent) serveRPC() { } func (a *Agent) RPCStart(args []string, pid *int) error { + log.Printf("rpc: start etcd with args %v", args) err := a.start(args...) if err != nil { return err } - log.Print("start", a.cmd.Args) *pid = a.cmd.Process.Pid return nil } func (a *Agent) RPCStop(args struct{}, reply *struct{}) error { + log.Printf("rpc: stop etcd") return a.stop() } func (a *Agent) RPCRestart(args struct{}, pid *int) error { + log.Printf("rpc: restart etcd") err := a.restart() if err != nil { return err @@ -55,6 +57,7 @@ func (a *Agent) RPCRestart(args struct{}, pid *int) error { } func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error { + log.Printf("rpc: terminate etcd") return a.terminate() } From 061baad611e2eabb759c8ee2f87095927579db5c Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Wed, 4 Mar 2015 16:45:23 -0800 Subject: [PATCH 3/6] tools/etcd-agent: write etcd log into log file --- tools/functional-tester/etcd-agent/agent.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/functional-tester/etcd-agent/agent.go b/tools/functional-tester/etcd-agent/agent.go index 32d996c07..418fd7ab4 100644 --- a/tools/functional-tester/etcd-agent/agent.go +++ b/tools/functional-tester/etcd-agent/agent.go @@ -22,8 +22,9 @@ import ( ) type Agent struct { - cmd *exec.Cmd - l net.Listener + cmd *exec.Cmd + logfile *os.File + l net.Listener } func newAgent(etcd string) (*Agent, error) { @@ -34,12 +35,20 @@ func newAgent(etcd string) (*Agent, error) { } c := exec.Command(etcd) - return &Agent{cmd: c}, nil + + f, err := os.Create("etcd.log") + if err != nil { + return nil, err + } + + return &Agent{cmd: c, logfile: f}, nil } // start starts a new etcd process with the given args. func (a *Agent) start(args ...string) error { a.cmd = exec.Command(a.cmd.Path, args...) + a.cmd.Stdout = a.logfile + a.cmd.Stderr = a.logfile return a.cmd.Start() } @@ -56,6 +65,8 @@ func (a *Agent) stop() error { // restart restarts the stopped etcd process. func (a *Agent) restart() error { a.cmd = exec.Command(a.cmd.Path, a.cmd.Args[1:]...) + a.cmd.Stdout = a.logfile + a.cmd.Stderr = a.logfile return a.cmd.Start() } From 8d3d737993748cbd6124989c2ec498db294b0380 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Wed, 4 Mar 2015 17:16:25 -0800 Subject: [PATCH 4/6] tools/etcd-agent/client: fix rpc Dial --- tools/functional-tester/etcd-agent/client/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/functional-tester/etcd-agent/client/client.go b/tools/functional-tester/etcd-agent/client/client.go index 384337d0b..88f956840 100644 --- a/tools/functional-tester/etcd-agent/client/client.go +++ b/tools/functional-tester/etcd-agent/client/client.go @@ -36,7 +36,7 @@ type agent struct { } func NewAgent(endpoint string) (Agent, error) { - c, err := rpc.Dial("tcp", endpoint) + c, err := rpc.DialHTTP("tcp", endpoint) if err != nil { return nil, err } From 530dd891be678b36624bf969e7bbcf862fe95936 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Thu, 5 Mar 2015 00:01:08 -0800 Subject: [PATCH 5/6] tools/etcd-tester: make it work 1. add cluster support 2. add failureNo case 3. add main func --- .../functional-tester/etcd-tester/cluster.go | 143 ++++++++++++++++++ .../functional-tester/etcd-tester/failure.go | 10 +- .../etcd-tester/failure_no.go | 29 ++++ tools/functional-tester/etcd-tester/main.go | 25 ++- tools/functional-tester/etcd-tester/tester.go | 54 ++++--- 5 files changed, 234 insertions(+), 27 deletions(-) create mode 100644 tools/functional-tester/etcd-tester/cluster.go create mode 100644 tools/functional-tester/etcd-tester/failure_no.go diff --git a/tools/functional-tester/etcd-tester/cluster.go b/tools/functional-tester/etcd-tester/cluster.go new file mode 100644 index 000000000..dcbbe7011 --- /dev/null +++ b/tools/functional-tester/etcd-tester/cluster.go @@ -0,0 +1,143 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "math/rand" + "net" + "strings" + "time" + + "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context" + etcdclient "github.com/coreos/etcd/client" + "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client" +) + +type cluster struct { + agentEndpoints []string + datadir string + + Size int + Agents []client.Agent + Names []string + ClientURLs []string +} + +// newCluster starts and returns a new cluster. The caller should call Terminate when finished, to shut it down. +func newCluster(agentEndpoints []string, datadir string) (*cluster, error) { + c := &cluster{ + agentEndpoints: agentEndpoints, + datadir: datadir, + } + if err := c.Bootstrap(); err != nil { + return nil, err + } + return c, nil +} + +func (c *cluster) Bootstrap() error { + size := len(c.agentEndpoints) + + agents := make([]client.Agent, size) + names := make([]string, size) + clientURLs := make([]string, size) + peerURLs := make([]string, size) + members := make([]string, size) + for i, u := range c.agentEndpoints { + var err error + agents[i], err = client.NewAgent(u) + if err != nil { + return err + } + + names[i] = fmt.Sprintf("etcd-%d", i) + + host, _, err := net.SplitHostPort(u) + if err != nil { + return err + } + clientURLs[i] = fmt.Sprintf("http://%s:2379", host) + peerURLs[i] = fmt.Sprintf("http://%s:2380", host) + + members[i] = fmt.Sprintf("%s=%s", names[i], peerURLs[i]) + } + clusterStr := strings.Join(members, ",") + token := fmt.Sprint(rand.Int()) + + for i, a := range agents { + _, err := a.Start( + "-name", names[i], + "-data-dir", c.datadir, + "-advertise-client-urls", clientURLs[i], + "-listen-client-urls", clientURLs[i], + "-initial-advertise-peer-urls", peerURLs[i], + "-listen-peer-urls", peerURLs[i], + "-initial-cluster-token", token, + "-initial-cluster", clusterStr, + "-initial-cluster-state", "new", + ) + if err != nil { + // cleanup + for j := 0; j < i; j++ { + agents[j].Terminate() + } + return err + } + } + + c.Size = size + c.Agents = agents + c.Names = names + c.ClientURLs = clientURLs + return nil +} + +func (c *cluster) WaitHealth() error { + var err error + for i := 0; i < 10; i++ { + err = setHealthKey(c.ClientURLs) + if err == nil { + return nil + } + time.Sleep(time.Second) + } + return err +} + +func (c *cluster) Terminate() { + for _, a := range c.Agents { + a.Terminate() + } +} + +// setHealthKey sets health key on all given urls. +func setHealthKey(us []string) error { + for _, u := range us { + cfg := etcdclient.Config{ + Endpoints: []string{u}, + } + c, err := etcdclient.New(cfg) + if err != nil { + return err + } + kapi := etcdclient.NewKeysAPI(c) + _, err = kapi.Set(context.TODO(), "health", "good", nil) + if err != nil { + return err + } + } + return nil +} diff --git a/tools/functional-tester/etcd-tester/failure.go b/tools/functional-tester/etcd-tester/failure.go index d95d2339a..a4881e70a 100644 --- a/tools/functional-tester/etcd-tester/failure.go +++ b/tools/functional-tester/etcd-tester/failure.go @@ -14,14 +14,16 @@ package main -import "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client" - type failure interface { // inject the failure into the testing cluster - Inject(agents []client.Agent) error + Inject(c *cluster) error // recover the injected failure and wait for the // recovery of the testing cluster - Recover(agents []client.Agent) error + Recover(c *cluster) error // return a description of the failure Desc() string } + +type description string + +func (d description) Desc() string { return string(d) } diff --git a/tools/functional-tester/etcd-tester/failure_no.go b/tools/functional-tester/etcd-tester/failure_no.go new file mode 100644 index 000000000..abd1a89ed --- /dev/null +++ b/tools/functional-tester/etcd-tester/failure_no.go @@ -0,0 +1,29 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +type failureBase struct { + description +} + +func newFailureBase() *failureBase { + return &failureBase{ + description: "do nothing", + } +} + +func (f *failureBase) Inject(c *cluster) error { return nil } + +func (f *failureBase) Recover(c *cluster) error { return nil } diff --git a/tools/functional-tester/etcd-tester/main.go b/tools/functional-tester/etcd-tester/main.go index f5f7042ab..736adbc02 100644 --- a/tools/functional-tester/etcd-tester/main.go +++ b/tools/functional-tester/etcd-tester/main.go @@ -14,6 +14,29 @@ package main +import ( + "flag" + "log" + "strings" +) + func main() { - panic("not implemented") + endpointStr := flag.String("agent-endpoints", ":9027", "") + datadir := flag.String("data-dir", "agent.etcd", "") + limit := flag.Int("limit", 3, "") + flag.Parse() + + endpoints := strings.Split(*endpointStr, ",") + c, err := newCluster(endpoints, *datadir) + if err != nil { + log.Fatal(err) + } + defer c.Terminate() + + t := &tester{ + failures: []failure{newFailureBase()}, + cluster: c, + limit: *limit, + } + t.runLoop() } diff --git a/tools/functional-tester/etcd-tester/tester.go b/tools/functional-tester/etcd-tester/tester.go index b84b497d1..a23cbe9e6 100644 --- a/tools/functional-tester/etcd-tester/tester.go +++ b/tools/functional-tester/etcd-tester/tester.go @@ -14,41 +14,51 @@ package main -import ( - "fmt" - - "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client" -) +import "log" type tester struct { failures []failure - agents []client.Agent + cluster *cluster limit int } func (tt *tester) runLoop() { for i := 0; i < tt.limit; i++ { for j, f := range tt.failures { - fmt.Println("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc()) - fmt.Println("etcd-tester: [round#%d case#%d] start injecting failure...", i, j) - if err := f.Inject(tt.agents); err != nil { - fmt.Println("etcd-tester: [round#%d case#%d] injection failing...", i, j) - tt.cleanup(i, j) + if err := tt.cluster.WaitHealth(); err != nil { + log.Printf("etcd-tester: [round#%d case#%d] wait full health error: %v", i, j, err) + if err := tt.cleanup(i, j); err != nil { + log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) + return + } + continue } - fmt.Println("etcd-tester: [round#%d case#%d] start recovering failure...", i, j) - if err := f.Recover(tt.agents); err != nil { - fmt.Println("etcd-tester: [round#%d case#%d] recovery failing...", i, j) - tt.cleanup(i, j) + log.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc()) + log.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j) + if err := f.Inject(tt.cluster); err != nil { + log.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err) + if err := tt.cleanup(i, j); err != nil { + log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) + return + } + continue } - fmt.Println("etcd-tester: [round#%d case#%d] succeed!", i, j) + log.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j) + if err := f.Recover(tt.cluster); err != nil { + log.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err) + if err := tt.cleanup(i, j); err != nil { + log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) + return + } + continue + } + log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j) } } } -func (tt *tester) cleanup(i, j int) { - fmt.Println("etcd-tester: [round#%d case#%d] cleaning up...", i, j) - for _, a := range tt.agents { - a.Terminate() - a.Start() - } +func (tt *tester) cleanup(i, j int) error { + log.Printf("etcd-tester: [round#%d case#%d] cleaning up...", i, j) + tt.cluster.Terminate() + return tt.cluster.Bootstrap() } From d5957aebfd30534d9310799c9c578658c0d621ca Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Thu, 5 Mar 2015 00:14:16 -0800 Subject: [PATCH 6/6] tools/etcd-tester: add failure killall --- .../etcd-tester/failure_killall.go | 43 +++++++++++++++++++ tools/functional-tester/etcd-tester/main.go | 2 +- 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tools/functional-tester/etcd-tester/failure_killall.go diff --git a/tools/functional-tester/etcd-tester/failure_killall.go b/tools/functional-tester/etcd-tester/failure_killall.go new file mode 100644 index 000000000..a767937e6 --- /dev/null +++ b/tools/functional-tester/etcd-tester/failure_killall.go @@ -0,0 +1,43 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +type failureKillAll struct { + description +} + +func newFailureKillAll() *failureKillAll { + return &failureKillAll{ + description: "kill all members", + } +} + +func (f *failureKillAll) Inject(c *cluster) error { + for _, a := range c.Agents { + if err := a.Stop(); err != nil { + return err + } + } + return nil +} + +func (f *failureKillAll) Recover(c *cluster) error { + for _, a := range c.Agents { + if _, err := a.Restart(); err != nil { + return err + } + } + return c.WaitHealth() +} diff --git a/tools/functional-tester/etcd-tester/main.go b/tools/functional-tester/etcd-tester/main.go index 736adbc02..708b221d5 100644 --- a/tools/functional-tester/etcd-tester/main.go +++ b/tools/functional-tester/etcd-tester/main.go @@ -34,7 +34,7 @@ func main() { defer c.Terminate() t := &tester{ - failures: []failure{newFailureBase()}, + failures: []failure{newFailureBase(), newFailureKillAll()}, cluster: c, limit: *limit, }