Merge pull request #2600 from yichengq/failure-isolate

tools/functional-tester: add isolate failures
This commit is contained in:
Yicheng Qin
2015-03-29 22:43:51 -07:00
9 changed files with 163 additions and 9 deletions

View File

@@ -22,6 +22,7 @@ import (
"path"
"time"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
@@ -135,6 +136,14 @@ func (a *Agent) terminate() error {
return nil
}
func (a *Agent) dropPort(port int) error {
return netutil.DropPort(port)
}
func (a *Agent) recoverPort(port int) error {
return netutil.RecoverPort(port)
}
func (a *Agent) status() client.Status {
return client.Status{State: a.state}
}

View File

@@ -34,8 +34,10 @@ type Agent interface {
Cleanup() error
// Terminate stops the exiting etcd the agent started and removes its data dir.
Terminate() error
// Isoloate isolates the network of etcd
Isolate() error
// DropPort drops all network packets at the given port.
DropPort(port int) error
// RecoverPort stops dropping all network packets at the given port.
RecoverPort(port int) error
// Status returns the status of etcd on the agent
Status() (Status, error)
}
@@ -83,8 +85,12 @@ func (a *agent) Terminate() error {
return a.rpcClient.Call("Agent.RPCTerminate", struct{}{}, nil)
}
func (a *agent) Isolate() error {
panic("not implemented")
func (a *agent) DropPort(port int) error {
return a.rpcClient.Call("Agent.RPCDropPort", port, nil)
}
func (a *agent) RecoverPort(port int) error {
return a.rpcClient.Call("Agent.RPCRecoverPort", port, nil)
}
func (a *agent) Status() (Status, error) {

View File

@@ -84,8 +84,22 @@ func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error {
return nil
}
func (a *Agent) RPCIsolate(args struct{}, reply *struct{}) error {
panic("not implemented")
func (a *Agent) RPCDropPort(port int, reply *struct{}) error {
log.Printf("rpc: drop port %d", port)
err := a.dropPort(port)
if err != nil {
log.Println("rpc: error dropping port", err)
}
return nil
}
func (a *Agent) RPCRecoverPort(port int, reply *struct{}) error {
log.Printf("rpc: recover port %d", port)
err := a.recoverPort(port)
if err != nil {
log.Println("rpc: error recovering port", err)
}
return nil
}
func (a *Agent) RPCStatus(args struct{}, status *client.Status) error {

View File

@@ -27,6 +27,8 @@ import (
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
const peerURLPort = 2380
type cluster struct {
agentEndpoints []string
datadir string
@@ -76,7 +78,7 @@ func (c *cluster) Bootstrap() error {
return err
}
clientURLs[i] = fmt.Sprintf("http://%s:2379", host)
peerURLs[i] = fmt.Sprintf("http://%s:2380", host)
peerURLs[i] = fmt.Sprintf("http://%s:%d", host, peerURLPort)
members[i] = fmt.Sprintf("%s=%s", names[i], peerURLs[i])
}
@@ -196,8 +198,10 @@ func setHealthKey(us []string) error {
if err != nil {
return err
}
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
kapi := etcdclient.NewKeysAPI(c)
_, err = kapi.Set(context.TODO(), "health", "good", nil)
_, err = kapi.Set(ctx, "health", "good", nil)
cancel()
if err != nil {
return err
}

View File

@@ -172,3 +172,57 @@ func (f *failureKillOneForLongTime) Recover(c *cluster, round int) error {
}
return c.WaitHealth()
}
type failureIsolate struct {
description
}
func newFailureIsolate() *failureIsolate {
return &failureIsolate{
description: "isolate one member",
}
}
func (f *failureIsolate) Inject(c *cluster, round int) error {
i := round % c.Size
if err := c.Agents[i].DropPort(peerURLPort); err != nil {
return err
}
return nil
}
func (f *failureIsolate) Recover(c *cluster, round int) error {
i := round % c.Size
if err := c.Agents[i].RecoverPort(peerURLPort); err != nil {
return err
}
return c.WaitHealth()
}
type failureIsolateAll struct {
description
}
func newFailureIsolateAll() *failureIsolateAll {
return &failureIsolateAll{
description: "isolate all members",
}
}
func (f *failureIsolateAll) Inject(c *cluster, round int) error {
for _, a := range c.Agents {
if err := a.DropPort(peerURLPort); err != nil {
return err
}
}
return nil
}
func (f *failureIsolateAll) Recover(c *cluster, round int) error {
for _, a := range c.Agents {
if err := a.RecoverPort(peerURLPort); err != nil {
return err
}
}
return c.WaitHealth()
}

View File

@@ -40,6 +40,8 @@ func main() {
newFailureKillMajority(),
newFailureKillOne(),
newFailureKillOneForLongTime(),
newFailureIsolate(),
newFailureIsolateAll(),
},
cluster: c,
limit: *limit,

View File

@@ -61,8 +61,10 @@ func (s *stresser) Stress() error {
for i := 0; i < s.N; i++ {
go func() {
for {
setctx, setcancel := context.WithTimeout(ctx, time.Second)
key := fmt.Sprintf("foo%d", rand.Intn(s.KeySuffixRange))
_, err := kv.Set(ctx, key, randStr(s.KeySize), nil)
_, err := kv.Set(setctx, key, randStr(s.KeySize), nil)
setcancel()
if err == context.Canceled {
return
}