From 267313a3f8da50a943f83eb36860be85c75fb8f0 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Thu, 5 Mar 2015 15:13:35 -0800 Subject: [PATCH] tools/etcd-tester: add kill majority test --- .../functional-tester/etcd-tester/failure.go | 11 ++-- .../etcd-tester/failure_killall.go | 4 +- .../etcd-tester/failure_killmaj.go | 57 +++++++++++++++++++ .../etcd-tester/failure_no.go | 4 +- tools/functional-tester/etcd-tester/main.go | 2 +- tools/functional-tester/etcd-tester/tester.go | 4 +- 6 files changed, 70 insertions(+), 12 deletions(-) create mode 100644 tools/functional-tester/etcd-tester/failure_killmaj.go diff --git a/tools/functional-tester/etcd-tester/failure.go b/tools/functional-tester/etcd-tester/failure.go index a4881e70a..dbfa5560c 100644 --- a/tools/functional-tester/etcd-tester/failure.go +++ b/tools/functional-tester/etcd-tester/failure.go @@ -15,11 +15,12 @@ package main type failure interface { - // inject the failure into the testing cluster - Inject(c *cluster) error - // recover the injected failure and wait for the - // recovery of the testing cluster - Recover(c *cluster) error + // Inject injeccts the failure into the testing cluster at the given + // round. When calling the function, the cluster should be in health. + Inject(c *cluster, round int) error + // Recover recovers the injected failure caused by the injection of the + // given round and wait for the recovery of the testing cluster. + Recover(c *cluster, round int) error // return a description of the failure Desc() string } diff --git a/tools/functional-tester/etcd-tester/failure_killall.go b/tools/functional-tester/etcd-tester/failure_killall.go index a767937e6..9a9cbb7d8 100644 --- a/tools/functional-tester/etcd-tester/failure_killall.go +++ b/tools/functional-tester/etcd-tester/failure_killall.go @@ -24,7 +24,7 @@ func newFailureKillAll() *failureKillAll { } } -func (f *failureKillAll) Inject(c *cluster) error { +func (f *failureKillAll) Inject(c *cluster, round int) error { for _, a := range c.Agents { if err := a.Stop(); err != nil { return err @@ -33,7 +33,7 @@ func (f *failureKillAll) Inject(c *cluster) error { return nil } -func (f *failureKillAll) Recover(c *cluster) error { +func (f *failureKillAll) Recover(c *cluster, round int) error { for _, a := range c.Agents { if _, err := a.Restart(); err != nil { return err diff --git a/tools/functional-tester/etcd-tester/failure_killmaj.go b/tools/functional-tester/etcd-tester/failure_killmaj.go new file mode 100644 index 000000000..8ccd566e9 --- /dev/null +++ b/tools/functional-tester/etcd-tester/failure_killmaj.go @@ -0,0 +1,57 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "math/rand" + +type failureKillMajority struct { + description +} + +func newFailureKillMajority() *failureKillMajority { + return &failureKillMajority{ + description: "kill majority of the cluster", + } +} + +func (f *failureKillMajority) Inject(c *cluster, round int) error { + for i := range getToKillMap(c.Size, round) { + if err := c.Agents[i].Stop(); err != nil { + return err + } + } + return nil +} + +func (f *failureKillMajority) Recover(c *cluster, round int) error { + for i := range getToKillMap(c.Size, round) { + if _, err := c.Agents[i].Restart(); err != nil { + return err + } + } + return c.WaitHealth() +} + +func getToKillMap(size int, seed int) map[int]bool { + m := make(map[int]bool) + r := rand.New(rand.NewSource(int64(seed))) + majority := size/2 + 1 + for { + m[r.Intn(size)] = true + if len(m) >= majority { + return m + } + } +} diff --git a/tools/functional-tester/etcd-tester/failure_no.go b/tools/functional-tester/etcd-tester/failure_no.go index abd1a89ed..999880f4d 100644 --- a/tools/functional-tester/etcd-tester/failure_no.go +++ b/tools/functional-tester/etcd-tester/failure_no.go @@ -24,6 +24,6 @@ func newFailureBase() *failureBase { } } -func (f *failureBase) Inject(c *cluster) error { return nil } +func (f *failureBase) Inject(c *cluster, round int) error { return nil } -func (f *failureBase) Recover(c *cluster) error { return nil } +func (f *failureBase) Recover(c *cluster, round int) error { return nil } diff --git a/tools/functional-tester/etcd-tester/main.go b/tools/functional-tester/etcd-tester/main.go index f0854de46..6360c6b87 100644 --- a/tools/functional-tester/etcd-tester/main.go +++ b/tools/functional-tester/etcd-tester/main.go @@ -44,7 +44,7 @@ func main() { } t := &tester{ - failures: []failure{newFailureBase(), newFailureKillAll()}, + failures: []failure{newFailureBase(), newFailureKillAll(), newFailureKillMajority()}, cluster: c, limit: *limit, } diff --git a/tools/functional-tester/etcd-tester/tester.go b/tools/functional-tester/etcd-tester/tester.go index a23cbe9e6..75423d30a 100644 --- a/tools/functional-tester/etcd-tester/tester.go +++ b/tools/functional-tester/etcd-tester/tester.go @@ -35,7 +35,7 @@ func (tt *tester) runLoop() { } log.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc()) log.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j) - if err := f.Inject(tt.cluster); err != nil { + if err := f.Inject(tt.cluster, i); err != nil { log.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err) if err := tt.cleanup(i, j); err != nil { log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) @@ -44,7 +44,7 @@ func (tt *tester) runLoop() { continue } log.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j) - if err := f.Recover(tt.cluster); err != nil { + if err := f.Recover(tt.cluster, i); err != nil { log.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err) if err := tt.cleanup(i, j); err != nil { log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)