mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

This commit adds a new option -failure-wrapper to etcd-tester. The option receives a path of script that is used for enabling/disabling external fault injectors. The script is called with an option "enable" when it needs to be enabled (when failure.Inject() is called) and called with "disabled" in an opposite case (when failure.Recover() is called).
198 lines
4.8 KiB
Go
198 lines
4.8 KiB
Go
// Copyright 2015 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"math/rand"
|
|
"os/exec"
|
|
"time"
|
|
)
|
|
|
|
type failure interface {
|
|
// Inject injeccts the failure into the testing cluster at the given
|
|
// round. When calling the function, the cluster should be in health.
|
|
Inject(c *cluster, round int) error
|
|
// Recover recovers the injected failure caused by the injection of the
|
|
// given round and wait for the recovery of the testing cluster.
|
|
Recover(c *cluster, round int) error
|
|
// Desc returns a description of the failure
|
|
Desc() string
|
|
}
|
|
|
|
type description string
|
|
|
|
func (d description) Desc() string { return string(d) }
|
|
|
|
type injectMemberFunc func(*member) error
|
|
type recoverMemberFunc func(*member) error
|
|
|
|
type failureByFunc struct {
|
|
description
|
|
injectMember injectMemberFunc
|
|
recoverMember recoverMemberFunc
|
|
}
|
|
|
|
type failureOne failureByFunc
|
|
type failureAll failureByFunc
|
|
type failureMajority failureByFunc
|
|
type failureLeader struct {
|
|
failureByFunc
|
|
idx int
|
|
}
|
|
|
|
type failureDelay struct {
|
|
failure
|
|
delayDuration time.Duration
|
|
}
|
|
|
|
// failureUntilSnapshot injects a failure and waits for a snapshot event
|
|
type failureUntilSnapshot struct{ failure }
|
|
|
|
func (f *failureOne) Inject(c *cluster, round int) error {
|
|
return f.injectMember(c.Members[round%c.Size])
|
|
}
|
|
|
|
func (f *failureOne) Recover(c *cluster, round int) error {
|
|
if err := f.recoverMember(c.Members[round%c.Size]); err != nil {
|
|
return err
|
|
}
|
|
return c.WaitHealth()
|
|
}
|
|
|
|
func (f *failureAll) Inject(c *cluster, round int) error {
|
|
for _, m := range c.Members {
|
|
if err := f.injectMember(m); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *failureAll) Recover(c *cluster, round int) error {
|
|
for _, m := range c.Members {
|
|
if err := f.recoverMember(m); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return c.WaitHealth()
|
|
}
|
|
|
|
func (f *failureMajority) Inject(c *cluster, round int) error {
|
|
for i := range killMap(c.Size, round) {
|
|
if err := f.injectMember(c.Members[i]); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *failureMajority) Recover(c *cluster, round int) error {
|
|
for i := range killMap(c.Size, round) {
|
|
if err := f.recoverMember(c.Members[i]); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *failureLeader) Inject(c *cluster, round int) error {
|
|
idx, err := c.GetLeader()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
f.idx = idx
|
|
return f.injectMember(c.Members[idx])
|
|
}
|
|
|
|
func (f *failureLeader) Recover(c *cluster, round int) error {
|
|
if err := f.recoverMember(c.Members[f.idx]); err != nil {
|
|
return err
|
|
}
|
|
return c.WaitHealth()
|
|
}
|
|
|
|
func (f *failureDelay) Inject(c *cluster, round int) error {
|
|
if err := f.failure.Inject(c, round); err != nil {
|
|
return err
|
|
}
|
|
time.Sleep(f.delayDuration)
|
|
return nil
|
|
}
|
|
|
|
func (f *failureUntilSnapshot) Inject(c *cluster, round int) error {
|
|
if err := f.failure.Inject(c, round); err != nil {
|
|
return err
|
|
}
|
|
|
|
if c.Size < 3 {
|
|
return nil
|
|
}
|
|
|
|
start, _ := c.Report()
|
|
end := start
|
|
// Normal healthy cluster could accept 1000req/s at least.
|
|
// Give it 3-times time to create a new snapshot.
|
|
retry := snapshotCount / 1000 * 3
|
|
for j := 0; j < retry; j++ {
|
|
end, _ = c.Report()
|
|
// If the number of proposals committed is bigger than snapshot count,
|
|
// a new snapshot should have been created.
|
|
if end-start > snapshotCount {
|
|
return nil
|
|
}
|
|
time.Sleep(time.Second)
|
|
}
|
|
return fmt.Errorf("cluster too slow: only commit %d requests in %ds", end-start, retry)
|
|
}
|
|
|
|
func (f *failureUntilSnapshot) Desc() string {
|
|
return f.failure.Desc() + " for a long time and expect it to recover from an incoming snapshot"
|
|
}
|
|
|
|
func killMap(size int, seed int) map[int]bool {
|
|
m := make(map[int]bool)
|
|
r := rand.New(rand.NewSource(int64(seed)))
|
|
majority := size/2 + 1
|
|
for {
|
|
m[r.Intn(size)] = true
|
|
if len(m) >= majority {
|
|
return m
|
|
}
|
|
}
|
|
}
|
|
|
|
type failureNop failureByFunc
|
|
|
|
func (f *failureNop) Inject(c *cluster, round int) error { return nil }
|
|
func (f *failureNop) Recover(c *cluster, round int) error { return nil }
|
|
|
|
type failureExternal struct {
|
|
failure
|
|
|
|
description string
|
|
scriptPath string
|
|
}
|
|
|
|
func (f *failureExternal) Inject(c *cluster, round int) error {
|
|
return exec.Command(f.scriptPath, "enable", fmt.Sprintf("%d", round)).Run()
|
|
}
|
|
|
|
func (f *failureExternal) Recover(c *cluster, round int) error {
|
|
return exec.Command(f.scriptPath, "disable", fmt.Sprintf("%d", round)).Run()
|
|
}
|
|
|
|
func (f *failureExternal) Desc() string { return f.description }
|