From 2bfd266a818540eea682d19797a25647d649d698 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Tue, 3 Mar 2015 16:51:45 -0800 Subject: [PATCH] tools/functional-tester: inital commit --- tools/functional-tester/README.md | 17 +++ tools/functional-tester/etcd-agent/agent.go | 77 +++++++++++ .../etcd-agent/agent_test.go | 84 ++++++++++++ .../etcd-agent/client/client.go | 78 +++++++++++ tools/functional-tester/etcd-agent/main.go | 19 +++ tools/functional-tester/etcd-agent/rpc.go | 63 +++++++++ .../functional-tester/etcd-agent/rpc_test.go | 125 ++++++++++++++++++ .../functional-tester/etcd-tester/failure.go | 27 ++++ tools/functional-tester/etcd-tester/main.go | 19 +++ tools/functional-tester/etcd-tester/tester.go | 54 ++++++++ 10 files changed, 563 insertions(+) create mode 100644 tools/functional-tester/README.md create mode 100644 tools/functional-tester/etcd-agent/agent.go create mode 100644 tools/functional-tester/etcd-agent/agent_test.go create mode 100644 tools/functional-tester/etcd-agent/client/client.go create mode 100644 tools/functional-tester/etcd-agent/main.go create mode 100644 tools/functional-tester/etcd-agent/rpc.go create mode 100644 tools/functional-tester/etcd-agent/rpc_test.go create mode 100644 tools/functional-tester/etcd-tester/failure.go create mode 100644 tools/functional-tester/etcd-tester/main.go create mode 100644 tools/functional-tester/etcd-tester/tester.go diff --git a/tools/functional-tester/README.md b/tools/functional-tester/README.md new file mode 100644 index 000000000..1da36687f --- /dev/null +++ b/tools/functional-tester/README.md @@ -0,0 +1,17 @@ +# etcd functional test suite + +etcd functional test suite tests the functionality of a etcd cluster with a focus on failure resistance under high pressure. It sets up an etcd cluster and inject failures into the cluster by killing the process or isolate the network of the process. It expects the etcd cluster to recover within a short amount of time after fixing the fault. + +etcd functional test suite has two components: etcd-agent and etcd-tester. etcd-agent runs on every test machines and etcd-tester is a single controller of the test. etcd-tester controls all the etcd-agent to start etcd clusters and simulate various failure cases. + +## requirements + +The environment of the cluster must be stable enough, so etcd test suite can assume that most of the failures are generated by itself. + +## etcd agent + +etcd agent is a daemon on each machines. It can start, stop, restart, isolate and terminate an etcd process. The agent exposes these functionality via HTTP RPC. + +## etcd tester + +etcd functional tester control the progress of the functional tests. It calls the the RPC of the etcd agent to simulate various test cases. For example, it can start a three members cluster by sending three start RPC calls to three different etcd agents. It can make one of the member failed by sending stop RPC call to one etcd agent. \ No newline at end of file diff --git a/tools/functional-tester/etcd-agent/agent.go b/tools/functional-tester/etcd-agent/agent.go new file mode 100644 index 000000000..32d996c07 --- /dev/null +++ b/tools/functional-tester/etcd-agent/agent.go @@ -0,0 +1,77 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "net" + "os" + "os/exec" + "path" +) + +type Agent struct { + cmd *exec.Cmd + l net.Listener +} + +func newAgent(etcd string) (*Agent, error) { + // check if the file exists + _, err := os.Stat(etcd) + if err != nil { + return nil, err + } + + c := exec.Command(etcd) + return &Agent{cmd: c}, nil +} + +// start starts a new etcd process with the given args. +func (a *Agent) start(args ...string) error { + a.cmd = exec.Command(a.cmd.Path, args...) + return a.cmd.Start() +} + +// stop stops the existing etcd process the agent started. +func (a *Agent) stop() error { + err := a.cmd.Process.Kill() + if err != nil { + return err + } + _, err = a.cmd.Process.Wait() + return err +} + +// restart restarts the stopped etcd process. +func (a *Agent) restart() error { + a.cmd = exec.Command(a.cmd.Path, a.cmd.Args[1:]...) + return a.cmd.Start() +} + +// terminate stops the exiting etcd process the agent started +// and removes the data dir. +func (a *Agent) terminate() error { + a.cmd.Process.Kill() + args := a.cmd.Args + + datadir := path.Join(a.cmd.Path, "*.etcd") + // only parse the simple case like "-data-dir /var/lib/etcd" + for i, arg := range args { + if arg == "-data-dir" { + datadir = args[i+1] + break + } + } + return os.RemoveAll(datadir) +} diff --git a/tools/functional-tester/etcd-agent/agent_test.go b/tools/functional-tester/etcd-agent/agent_test.go new file mode 100644 index 000000000..031c14241 --- /dev/null +++ b/tools/functional-tester/etcd-agent/agent_test.go @@ -0,0 +1,84 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "io/ioutil" + "os" + "testing" +) + +const etcdPath = "./etcd" + +func TestAgentStart(t *testing.T) { + a, dir := newTestAgent(t) + defer a.terminate() + + err := a.start("-data-dir", dir) + if err != nil { + t.Fatal(err) + } +} + +func TestAgentRestart(t *testing.T) { + a, dir := newTestAgent(t) + defer a.terminate() + + err := a.start("-data-dir", dir) + if err != nil { + t.Fatal(err) + } + + err = a.stop() + if err != nil { + t.Fatal(err) + } + err = a.restart() + if err != nil { + t.Fatal(err) + } +} + +func TestAgentTerminate(t *testing.T) { + a, dir := newTestAgent(t) + + err := a.start("-data-dir", dir) + if err != nil { + t.Fatal(err) + } + + err = a.terminate() + if err != nil { + t.Fatal(err) + } + + if _, err := os.Stat(dir); !os.IsNotExist(err) { + t.Fatal(err) + } +} + +// newTestAgent creates a test agent and with a temp data directory. +func newTestAgent(t *testing.T) (*Agent, string) { + a, err := newAgent(etcdPath) + if err != nil { + t.Fatal(err) + } + + dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent") + if err != nil { + t.Fatal(err) + } + return a, dir +} diff --git a/tools/functional-tester/etcd-agent/client/client.go b/tools/functional-tester/etcd-agent/client/client.go new file mode 100644 index 000000000..384337d0b --- /dev/null +++ b/tools/functional-tester/etcd-agent/client/client.go @@ -0,0 +1,78 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import "net/rpc" + +type Agent interface { + ID() uint64 + // Start starts a new etcd with the given args on the agent machine. + Start(args ...string) (int, error) + // Stop stops the existing etcd the agent started. + Stop() error + // Restart restarts the existing etcd the agent stopped. + Restart() (int, error) + // Terminate stops the exiting etcd the agent started and removes its data dir. + Terminate() error + // Isoloate isolates the network of etcd + Isolate() error +} + +type agent struct { + endpoint string + rpcClient *rpc.Client +} + +func NewAgent(endpoint string) (Agent, error) { + c, err := rpc.Dial("tcp", endpoint) + if err != nil { + return nil, err + } + return &agent{endpoint, c}, nil +} + +func (a *agent) Start(args ...string) (int, error) { + var pid int + err := a.rpcClient.Call("Agent.RPCStart", args, &pid) + if err != nil { + return -1, err + } + return pid, nil +} + +func (a *agent) Stop() error { + return a.rpcClient.Call("Agent.RPCStop", struct{}{}, nil) +} + +func (a *agent) Restart() (int, error) { + var pid int + err := a.rpcClient.Call("Agent.RPCRestart", struct{}{}, &pid) + if err != nil { + return -1, err + } + return pid, nil +} + +func (a *agent) Terminate() error { + return a.rpcClient.Call("Agent.RPCTerminate", struct{}{}, nil) +} + +func (a *agent) Isolate() error { + panic("not implemented") +} + +func (a *agent) ID() uint64 { + panic("not implemented") +} diff --git a/tools/functional-tester/etcd-agent/main.go b/tools/functional-tester/etcd-agent/main.go new file mode 100644 index 000000000..f5f7042ab --- /dev/null +++ b/tools/functional-tester/etcd-agent/main.go @@ -0,0 +1,19 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +func main() { + panic("not implemented") +} diff --git a/tools/functional-tester/etcd-agent/rpc.go b/tools/functional-tester/etcd-agent/rpc.go new file mode 100644 index 000000000..1c42bae85 --- /dev/null +++ b/tools/functional-tester/etcd-agent/rpc.go @@ -0,0 +1,63 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "log" + "net" + "net/http" + "net/rpc" +) + +func (a *Agent) serveRPC() { + rpc.Register(a) + rpc.HandleHTTP() + l, e := net.Listen("tcp", ":9027") + if e != nil { + log.Fatal("agent:", e) + } + go http.Serve(l, nil) +} + +func (a *Agent) RPCStart(args []string, pid *int) error { + err := a.start(args...) + if err != nil { + return err + } + log.Print("start", a.cmd.Args) + *pid = a.cmd.Process.Pid + return nil +} + +func (a *Agent) RPCStop(args struct{}, reply *struct{}) error { + return a.stop() +} + +func (a *Agent) RPCRestart(args struct{}, pid *int) error { + err := a.restart() + if err != nil { + return err + } + *pid = a.cmd.Process.Pid + return nil +} + +func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error { + return a.terminate() +} + +func (a *Agent) RPCIsolate(args struct{}, reply *struct{}) error { + panic("not implemented") +} diff --git a/tools/functional-tester/etcd-agent/rpc_test.go b/tools/functional-tester/etcd-agent/rpc_test.go new file mode 100644 index 000000000..3fb25263e --- /dev/null +++ b/tools/functional-tester/etcd-agent/rpc_test.go @@ -0,0 +1,125 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "io/ioutil" + "log" + "net/rpc" + "os" + "testing" +) + +func init() { + defaultAgent, err := newAgent(etcdPath) + if err != nil { + log.Panic(err) + } + defaultAgent.serveRPC() +} + +func TestRPCStart(t *testing.T) { + c, err := rpc.DialHTTP("tcp", ":9027") + if err != nil { + t.Fatal(err) + } + + dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent") + if err != nil { + t.Fatal(err) + } + var pid int + err = c.Call("Agent.RPCStart", []string{"-data-dir", dir}, &pid) + if err != nil { + t.Fatal(err) + } + defer c.Call("Agent.RPCTerminate", struct{}{}, nil) + + _, err = os.FindProcess(pid) + if err != nil { + t.Errorf("unexpected error %v when find process %d", err, pid) + } +} + +func TestRPCRestart(t *testing.T) { + c, err := rpc.DialHTTP("tcp", ":9027") + if err != nil { + t.Fatal(err) + } + + dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent") + if err != nil { + t.Fatal(err) + } + var pid int + err = c.Call("Agent.RPCStart", []string{"-data-dir", dir}, &pid) + if err != nil { + t.Fatal(err) + } + defer c.Call("Agent.RPCTerminate", struct{}{}, nil) + + err = c.Call("Agent.RPCStop", struct{}{}, nil) + if err != nil { + t.Fatal(err) + } + var npid int + err = c.Call("Agent.RPCRestart", struct{}{}, &npid) + if err != nil { + t.Fatal(err) + } + + if npid == pid { + t.Errorf("pid = %v, want not equal to %d", npid, pid) + } + + s, err := os.FindProcess(pid) + if err != nil { + t.Errorf("unexpected error %v when find process %d", err, pid) + } + _, err = s.Wait() + if err == nil { + t.Errorf("err = nil, want killed error") + } + _, err = os.FindProcess(npid) + if err != nil { + t.Errorf("unexpected error %v when find process %d", err, npid) + } +} + +func TestRPCTerminate(t *testing.T) { + c, err := rpc.DialHTTP("tcp", ":9027") + if err != nil { + t.Fatal(err) + } + + dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent") + if err != nil { + t.Fatal(err) + } + var pid int + err = c.Call("Agent.RPCStart", []string{"-data-dir", dir}, &pid) + if err != nil { + t.Fatal(err) + } + + err = c.Call("Agent.RPCTerminate", struct{}{}, nil) + if err != nil { + t.Fatal(err) + } + + if _, err := os.Stat(dir); !os.IsNotExist(err) { + t.Fatal(err) + } +} diff --git a/tools/functional-tester/etcd-tester/failure.go b/tools/functional-tester/etcd-tester/failure.go new file mode 100644 index 000000000..d95d2339a --- /dev/null +++ b/tools/functional-tester/etcd-tester/failure.go @@ -0,0 +1,27 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client" + +type failure interface { + // inject the failure into the testing cluster + Inject(agents []client.Agent) error + // recover the injected failure and wait for the + // recovery of the testing cluster + Recover(agents []client.Agent) error + // return a description of the failure + Desc() string +} diff --git a/tools/functional-tester/etcd-tester/main.go b/tools/functional-tester/etcd-tester/main.go new file mode 100644 index 000000000..f5f7042ab --- /dev/null +++ b/tools/functional-tester/etcd-tester/main.go @@ -0,0 +1,19 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +func main() { + panic("not implemented") +} diff --git a/tools/functional-tester/etcd-tester/tester.go b/tools/functional-tester/etcd-tester/tester.go new file mode 100644 index 000000000..2609418e0 --- /dev/null +++ b/tools/functional-tester/etcd-tester/tester.go @@ -0,0 +1,54 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + + "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client" +) + +type tester struct { + failures []failure + agents []client.Agent + limit uint64 +} + +func (tt *tester) runLoop() { + for i := 0; i < tt.limit; i++ { + for j, f := range tt.failures { + fmt.Println("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc()) + fmt.Println("etcd-tester: [round#%d case#%d] start injecting failure...", i, j) + if err := f.Inject(tt.agents); err != nil { + fmt.Println("etcd-tester: [round#%d case#%d] injection failing...", i, j) + tt.cleanup(i, j) + } + fmt.Println("etcd-tester: [round#%d case#%d] start recovering failure...", i, j) + if err := f.Recover(tt.agents); err != nil { + fmt.Println("etcd-tester: [round#%d case#%d] recovery failing...", i, j) + tt.cleanup(i, j) + } + fmt.Println("etcd-tester: [round#%d case#%d] succeed!", i, j) + } + } +} + +func (tt *tester) cleanup(i, j int) { + fmt.Println("etcd-tester: [round#%d case#%d] cleaning up...", i, j) + for _, a := range tt.agents { + a.Terminate() + a.Start() + } +}