mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #2453 from yichengq/334
tools/etcd-tester: add kill one member tests
This commit is contained in:
commit
9a9d00b482
@ -32,6 +32,7 @@ type cluster struct {
|
||||
|
||||
Size int
|
||||
Agents []client.Agent
|
||||
Stressers []Stresser
|
||||
Names []string
|
||||
ClientURLs []string
|
||||
}
|
||||
@ -98,8 +99,19 @@ func (c *cluster) Bootstrap() error {
|
||||
}
|
||||
}
|
||||
|
||||
stressers := make([]Stresser, len(clientURLs))
|
||||
for i, u := range clientURLs {
|
||||
s := &stresser{
|
||||
Endpoint: u,
|
||||
N: 200,
|
||||
}
|
||||
go s.Stress()
|
||||
stressers[i] = s
|
||||
}
|
||||
|
||||
c.Size = size
|
||||
c.Agents = agents
|
||||
c.Stressers = stressers
|
||||
c.Names = names
|
||||
c.ClientURLs = clientURLs
|
||||
return nil
|
||||
@ -117,19 +129,35 @@ func (c *cluster) WaitHealth() error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *cluster) Report() (success, failure int) {
|
||||
for _, stress := range c.Stressers {
|
||||
s, f := stress.Report()
|
||||
success += s
|
||||
failure += f
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (c *cluster) Cleanup() error {
|
||||
var lasterr error
|
||||
for _, a := range c.Agents {
|
||||
if err := a.Cleanup(); err != nil {
|
||||
return err
|
||||
lasterr = err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
for _, s := range c.Stressers {
|
||||
s.Cancel()
|
||||
}
|
||||
return lasterr
|
||||
}
|
||||
|
||||
func (c *cluster) Terminate() {
|
||||
for _, a := range c.Agents {
|
||||
a.Terminate()
|
||||
}
|
||||
for _, s := range c.Stressers {
|
||||
s.Cancel()
|
||||
}
|
||||
}
|
||||
|
||||
// setHealthKey sets health key on all given urls.
|
||||
|
@ -14,6 +14,14 @@
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
)
|
||||
|
||||
const snapshotCount = 10000
|
||||
|
||||
type failure interface {
|
||||
// Inject injeccts the failure into the testing cluster at the given
|
||||
// round. When calling the function, the cluster should be in health.
|
||||
@ -28,3 +36,139 @@ type failure interface {
|
||||
type description string
|
||||
|
||||
func (d description) Desc() string { return string(d) }
|
||||
|
||||
type failureKillAll struct {
|
||||
description
|
||||
}
|
||||
|
||||
func newFailureKillAll() *failureKillAll {
|
||||
return &failureKillAll{
|
||||
description: "kill all members",
|
||||
}
|
||||
}
|
||||
|
||||
func (f *failureKillAll) Inject(c *cluster, round int) error {
|
||||
for _, a := range c.Agents {
|
||||
if err := a.Stop(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureKillAll) Recover(c *cluster, round int) error {
|
||||
for _, a := range c.Agents {
|
||||
if _, err := a.Restart(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
||||
type failureKillMajority struct {
|
||||
description
|
||||
}
|
||||
|
||||
func newFailureKillMajority() *failureKillMajority {
|
||||
return &failureKillMajority{
|
||||
description: "kill majority of the cluster",
|
||||
}
|
||||
}
|
||||
|
||||
func (f *failureKillMajority) Inject(c *cluster, round int) error {
|
||||
for i := range getToKillMap(c.Size, round) {
|
||||
if err := c.Agents[i].Stop(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureKillMajority) Recover(c *cluster, round int) error {
|
||||
for i := range getToKillMap(c.Size, round) {
|
||||
if _, err := c.Agents[i].Restart(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
||||
func getToKillMap(size int, seed int) map[int]bool {
|
||||
m := make(map[int]bool)
|
||||
r := rand.New(rand.NewSource(int64(seed)))
|
||||
majority := size/2 + 1
|
||||
for {
|
||||
m[r.Intn(size)] = true
|
||||
if len(m) >= majority {
|
||||
return m
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type failureKillOne struct {
|
||||
description
|
||||
}
|
||||
|
||||
func newFailureKillOne() *failureKillOne {
|
||||
return &failureKillOne{
|
||||
description: "kill one random member",
|
||||
}
|
||||
}
|
||||
|
||||
func (f *failureKillOne) Inject(c *cluster, round int) error {
|
||||
i := round % c.Size
|
||||
return c.Agents[i].Stop()
|
||||
}
|
||||
|
||||
func (f *failureKillOne) Recover(c *cluster, round int) error {
|
||||
i := round % c.Size
|
||||
if _, err := c.Agents[i].Restart(); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
||||
// failureKillOneForLongTime kills one member for long time, and restart
|
||||
// after a snapshot is required.
|
||||
type failureKillOneForLongTime struct {
|
||||
description
|
||||
}
|
||||
|
||||
func newFailureKillOneForLongTime() *failureKillOneForLongTime {
|
||||
return &failureKillOneForLongTime{
|
||||
description: "kill one member for long time and expect it to recover from incoming snapshot",
|
||||
}
|
||||
}
|
||||
|
||||
func (f *failureKillOneForLongTime) Inject(c *cluster, round int) error {
|
||||
i := round % c.Size
|
||||
if err := c.Agents[i].Stop(); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Size >= 3 {
|
||||
start, _ := c.Report()
|
||||
var end int
|
||||
// Normal healthy cluster could accept 1000req/s at least.
|
||||
// Give it 3-times time to create a new snapshot.
|
||||
retry := snapshotCount / 1000 * 3
|
||||
for j := 0; j < retry; j++ {
|
||||
end, _ = c.Report()
|
||||
// If the number of proposals committed is bigger than snapshot count,
|
||||
// a new snapshot should have been created.
|
||||
if end-start > snapshotCount {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
return fmt.Errorf("cluster too slow: only commit %d requests in %ds", end-start, retry)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureKillOneForLongTime) Recover(c *cluster, round int) error {
|
||||
i := round % c.Size
|
||||
if _, err := c.Agents[i].Restart(); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
@ -1,43 +0,0 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
type failureKillAll struct {
|
||||
description
|
||||
}
|
||||
|
||||
func newFailureKillAll() *failureKillAll {
|
||||
return &failureKillAll{
|
||||
description: "kill all members",
|
||||
}
|
||||
}
|
||||
|
||||
func (f *failureKillAll) Inject(c *cluster, round int) error {
|
||||
for _, a := range c.Agents {
|
||||
if err := a.Stop(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureKillAll) Recover(c *cluster, round int) error {
|
||||
for _, a := range c.Agents {
|
||||
if _, err := a.Restart(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import "math/rand"
|
||||
|
||||
type failureKillMajority struct {
|
||||
description
|
||||
}
|
||||
|
||||
func newFailureKillMajority() *failureKillMajority {
|
||||
return &failureKillMajority{
|
||||
description: "kill majority of the cluster",
|
||||
}
|
||||
}
|
||||
|
||||
func (f *failureKillMajority) Inject(c *cluster, round int) error {
|
||||
for i := range getToKillMap(c.Size, round) {
|
||||
if err := c.Agents[i].Stop(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureKillMajority) Recover(c *cluster, round int) error {
|
||||
for i := range getToKillMap(c.Size, round) {
|
||||
if _, err := c.Agents[i].Restart(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
||||
func getToKillMap(size int, seed int) map[int]bool {
|
||||
m := make(map[int]bool)
|
||||
r := rand.New(rand.NewSource(int64(seed)))
|
||||
majority := size/2 + 1
|
||||
for {
|
||||
m[r.Intn(size)] = true
|
||||
if len(m) >= majority {
|
||||
return m
|
||||
}
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
type failureBase struct {
|
||||
description
|
||||
}
|
||||
|
||||
func newFailureBase() *failureBase {
|
||||
return &failureBase{
|
||||
description: "do nothing",
|
||||
}
|
||||
}
|
||||
|
||||
func (f *failureBase) Inject(c *cluster, round int) error { return nil }
|
||||
|
||||
func (f *failureBase) Recover(c *cluster, round int) error { return nil }
|
@ -33,24 +33,15 @@ func main() {
|
||||
}
|
||||
defer c.Terminate()
|
||||
|
||||
stressers := make([]Stresser, len(c.ClientURLs))
|
||||
for i, u := range c.ClientURLs {
|
||||
s := &stresser{
|
||||
Endpoint: u,
|
||||
N: 200,
|
||||
}
|
||||
go s.Stress()
|
||||
stressers[i] = s
|
||||
}
|
||||
|
||||
t := &tester{
|
||||
failures: []failure{newFailureBase(), newFailureKillAll(), newFailureKillMajority()},
|
||||
cluster: c,
|
||||
limit: *limit,
|
||||
failures: []failure{
|
||||
newFailureKillAll(),
|
||||
newFailureKillMajority(),
|
||||
newFailureKillOne(),
|
||||
newFailureKillOneForLongTime(),
|
||||
},
|
||||
cluster: c,
|
||||
limit: *limit,
|
||||
}
|
||||
t.runLoop()
|
||||
|
||||
for _, s := range stressers {
|
||||
s.Cancel()
|
||||
}
|
||||
}
|
||||
|
@ -65,8 +65,9 @@ func (s *stresser) Stress() error {
|
||||
s.mu.Lock()
|
||||
if err != nil {
|
||||
s.failure++
|
||||
} else {
|
||||
s.success++
|
||||
}
|
||||
s.success++
|
||||
s.mu.Unlock()
|
||||
}
|
||||
}()
|
||||
|
Loading…
x
Reference in New Issue
Block a user