diff --git a/tools/functional-tester/etcd-tester/cluster.go b/tools/functional-tester/etcd-tester/cluster.go index 25b851666..876d57e7c 100644 --- a/tools/functional-tester/etcd-tester/cluster.go +++ b/tools/functional-tester/etcd-tester/cluster.go @@ -32,6 +32,7 @@ type cluster struct { Size int Agents []client.Agent + Stressers []Stresser Names []string ClientURLs []string } @@ -98,8 +99,19 @@ func (c *cluster) Bootstrap() error { } } + stressers := make([]Stresser, len(clientURLs)) + for i, u := range clientURLs { + s := &stresser{ + Endpoint: u, + N: 200, + } + go s.Stress() + stressers[i] = s + } + c.Size = size c.Agents = agents + c.Stressers = stressers c.Names = names c.ClientURLs = clientURLs return nil @@ -117,19 +129,35 @@ func (c *cluster) WaitHealth() error { return err } +func (c *cluster) Report() (success, failure int) { + for _, stress := range c.Stressers { + s, f := stress.Report() + success += s + failure += f + } + return +} + func (c *cluster) Cleanup() error { + var lasterr error for _, a := range c.Agents { if err := a.Cleanup(); err != nil { - return err + lasterr = err } } - return nil + for _, s := range c.Stressers { + s.Cancel() + } + return lasterr } func (c *cluster) Terminate() { for _, a := range c.Agents { a.Terminate() } + for _, s := range c.Stressers { + s.Cancel() + } } // setHealthKey sets health key on all given urls. diff --git a/tools/functional-tester/etcd-tester/failure.go b/tools/functional-tester/etcd-tester/failure.go index dbfa5560c..79efe4c1b 100644 --- a/tools/functional-tester/etcd-tester/failure.go +++ b/tools/functional-tester/etcd-tester/failure.go @@ -14,6 +14,14 @@ package main +import ( + "fmt" + "math/rand" + "time" +) + +const snapshotCount = 10000 + type failure interface { // Inject injeccts the failure into the testing cluster at the given // round. When calling the function, the cluster should be in health. @@ -28,3 +36,139 @@ type failure interface { type description string func (d description) Desc() string { return string(d) } + +type failureKillAll struct { + description +} + +func newFailureKillAll() *failureKillAll { + return &failureKillAll{ + description: "kill all members", + } +} + +func (f *failureKillAll) Inject(c *cluster, round int) error { + for _, a := range c.Agents { + if err := a.Stop(); err != nil { + return err + } + } + return nil +} + +func (f *failureKillAll) Recover(c *cluster, round int) error { + for _, a := range c.Agents { + if _, err := a.Restart(); err != nil { + return err + } + } + return c.WaitHealth() +} + +type failureKillMajority struct { + description +} + +func newFailureKillMajority() *failureKillMajority { + return &failureKillMajority{ + description: "kill majority of the cluster", + } +} + +func (f *failureKillMajority) Inject(c *cluster, round int) error { + for i := range getToKillMap(c.Size, round) { + if err := c.Agents[i].Stop(); err != nil { + return err + } + } + return nil +} + +func (f *failureKillMajority) Recover(c *cluster, round int) error { + for i := range getToKillMap(c.Size, round) { + if _, err := c.Agents[i].Restart(); err != nil { + return err + } + } + return c.WaitHealth() +} + +func getToKillMap(size int, seed int) map[int]bool { + m := make(map[int]bool) + r := rand.New(rand.NewSource(int64(seed))) + majority := size/2 + 1 + for { + m[r.Intn(size)] = true + if len(m) >= majority { + return m + } + } +} + +type failureKillOne struct { + description +} + +func newFailureKillOne() *failureKillOne { + return &failureKillOne{ + description: "kill one random member", + } +} + +func (f *failureKillOne) Inject(c *cluster, round int) error { + i := round % c.Size + return c.Agents[i].Stop() +} + +func (f *failureKillOne) Recover(c *cluster, round int) error { + i := round % c.Size + if _, err := c.Agents[i].Restart(); err != nil { + return err + } + return c.WaitHealth() +} + +// failureKillOneForLongTime kills one member for long time, and restart +// after a snapshot is required. +type failureKillOneForLongTime struct { + description +} + +func newFailureKillOneForLongTime() *failureKillOneForLongTime { + return &failureKillOneForLongTime{ + description: "kill one member for long time and expect it to recover from incoming snapshot", + } +} + +func (f *failureKillOneForLongTime) Inject(c *cluster, round int) error { + i := round % c.Size + if err := c.Agents[i].Stop(); err != nil { + return err + } + if c.Size >= 3 { + start, _ := c.Report() + var end int + // Normal healthy cluster could accept 1000req/s at least. + // Give it 3-times time to create a new snapshot. + retry := snapshotCount / 1000 * 3 + for j := 0; j < retry; j++ { + end, _ = c.Report() + // If the number of proposals committed is bigger than snapshot count, + // a new snapshot should have been created. + if end-start > snapshotCount { + return nil + } + time.Sleep(time.Second) + } + return fmt.Errorf("cluster too slow: only commit %d requests in %ds", end-start, retry) + } + return nil +} + +func (f *failureKillOneForLongTime) Recover(c *cluster, round int) error { + i := round % c.Size + if _, err := c.Agents[i].Restart(); err != nil { + return err + } + return c.WaitHealth() +} diff --git a/tools/functional-tester/etcd-tester/failure_killall.go b/tools/functional-tester/etcd-tester/failure_killall.go deleted file mode 100644 index 9a9cbb7d8..000000000 --- a/tools/functional-tester/etcd-tester/failure_killall.go +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2015 CoreOS, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -type failureKillAll struct { - description -} - -func newFailureKillAll() *failureKillAll { - return &failureKillAll{ - description: "kill all members", - } -} - -func (f *failureKillAll) Inject(c *cluster, round int) error { - for _, a := range c.Agents { - if err := a.Stop(); err != nil { - return err - } - } - return nil -} - -func (f *failureKillAll) Recover(c *cluster, round int) error { - for _, a := range c.Agents { - if _, err := a.Restart(); err != nil { - return err - } - } - return c.WaitHealth() -} diff --git a/tools/functional-tester/etcd-tester/failure_killmaj.go b/tools/functional-tester/etcd-tester/failure_killmaj.go deleted file mode 100644 index 8ccd566e9..000000000 --- a/tools/functional-tester/etcd-tester/failure_killmaj.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2015 CoreOS, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import "math/rand" - -type failureKillMajority struct { - description -} - -func newFailureKillMajority() *failureKillMajority { - return &failureKillMajority{ - description: "kill majority of the cluster", - } -} - -func (f *failureKillMajority) Inject(c *cluster, round int) error { - for i := range getToKillMap(c.Size, round) { - if err := c.Agents[i].Stop(); err != nil { - return err - } - } - return nil -} - -func (f *failureKillMajority) Recover(c *cluster, round int) error { - for i := range getToKillMap(c.Size, round) { - if _, err := c.Agents[i].Restart(); err != nil { - return err - } - } - return c.WaitHealth() -} - -func getToKillMap(size int, seed int) map[int]bool { - m := make(map[int]bool) - r := rand.New(rand.NewSource(int64(seed))) - majority := size/2 + 1 - for { - m[r.Intn(size)] = true - if len(m) >= majority { - return m - } - } -} diff --git a/tools/functional-tester/etcd-tester/failure_no.go b/tools/functional-tester/etcd-tester/failure_no.go deleted file mode 100644 index 999880f4d..000000000 --- a/tools/functional-tester/etcd-tester/failure_no.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2015 CoreOS, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -type failureBase struct { - description -} - -func newFailureBase() *failureBase { - return &failureBase{ - description: "do nothing", - } -} - -func (f *failureBase) Inject(c *cluster, round int) error { return nil } - -func (f *failureBase) Recover(c *cluster, round int) error { return nil } diff --git a/tools/functional-tester/etcd-tester/main.go b/tools/functional-tester/etcd-tester/main.go index 6360c6b87..6c8fe5dac 100644 --- a/tools/functional-tester/etcd-tester/main.go +++ b/tools/functional-tester/etcd-tester/main.go @@ -33,24 +33,15 @@ func main() { } defer c.Terminate() - stressers := make([]Stresser, len(c.ClientURLs)) - for i, u := range c.ClientURLs { - s := &stresser{ - Endpoint: u, - N: 200, - } - go s.Stress() - stressers[i] = s - } - t := &tester{ - failures: []failure{newFailureBase(), newFailureKillAll(), newFailureKillMajority()}, - cluster: c, - limit: *limit, + failures: []failure{ + newFailureKillAll(), + newFailureKillMajority(), + newFailureKillOne(), + newFailureKillOneForLongTime(), + }, + cluster: c, + limit: *limit, } t.runLoop() - - for _, s := range stressers { - s.Cancel() - } } diff --git a/tools/functional-tester/etcd-tester/stresser.go b/tools/functional-tester/etcd-tester/stresser.go index 680d47337..280c1e18f 100644 --- a/tools/functional-tester/etcd-tester/stresser.go +++ b/tools/functional-tester/etcd-tester/stresser.go @@ -65,8 +65,9 @@ func (s *stresser) Stress() error { s.mu.Lock() if err != nil { s.failure++ + } else { + s.success++ } - s.success++ s.mu.Unlock() } }()