etcd-runner: integrate etcd runner in to etcd tester

etcd tester runs etcd runner as a separate binary.
it signals sigstop to the runner when tester wants to stop stressing.
it signals sigcont to the runner when tester wants to start stressing.
when tester needs to clean up, it signals sigint to runner.

FIXES #7026
This commit is contained in:
fanmin shi 2017-04-25 11:36:54 -07:00
parent 2af1605db3
commit a9e04061b1
9 changed files with 214 additions and 20 deletions

View File

@ -7,4 +7,5 @@ fi
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-agent ./cmd/tools/functional-tester/etcd-agent CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-agent ./cmd/tools/functional-tester/etcd-agent
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-tester ./cmd/tools/functional-tester/etcd-tester CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-tester ./cmd/tools/functional-tester/etcd-tester
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-runner ./cmd/tools/functional-tester/etcd-runner

View File

@ -245,6 +245,19 @@ func (cchecker *compositeChecker) Check() error {
return errsToError(errs) return errsToError(errs)
} }
type runnerChecker struct {
errc chan error
}
func (rc *runnerChecker) Check() error {
select {
case err := <-rc.errc:
return err
default:
return nil
}
}
type noChecker struct{} type noChecker struct{}
func newNoChecker() Checker { return &noChecker{} } func newNoChecker() Checker { return &noChecker{} }

View File

@ -0,0 +1,97 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"io/ioutil"
"os/exec"
"syscall"
"golang.org/x/time/rate"
)
type runnerStresser struct {
cmd *exec.Cmd
cmdStr string
args []string
rl *rate.Limiter
reqRate int
errc chan error
donec chan struct{}
}
func newRunnerStresser(cmdStr string, args []string, rl *rate.Limiter, reqRate int) *runnerStresser {
rl.SetLimit(rl.Limit() - rate.Limit(reqRate))
return &runnerStresser{
cmdStr: cmdStr,
args: args,
rl: rl,
reqRate: reqRate,
errc: make(chan error, 1),
donec: make(chan struct{}),
}
}
func (rs *runnerStresser) setupOnce() (err error) {
if rs.cmd != nil {
return nil
}
rs.cmd = exec.Command(rs.cmdStr, rs.args...)
stderr, err := rs.cmd.StderrPipe()
if err != nil {
return err
}
go func() {
defer close(rs.donec)
out, err := ioutil.ReadAll(stderr)
if err != nil {
rs.errc <- err
} else {
rs.errc <- fmt.Errorf("(%v %v) stderr %v", rs.cmdStr, rs.args, string(out))
}
}()
return rs.cmd.Start()
}
func (rs *runnerStresser) Stress() (err error) {
if err = rs.setupOnce(); err != nil {
return err
}
return syscall.Kill(rs.cmd.Process.Pid, syscall.SIGCONT)
}
func (rs *runnerStresser) Pause() {
syscall.Kill(rs.cmd.Process.Pid, syscall.SIGSTOP)
}
func (rs *runnerStresser) Close() {
syscall.Kill(rs.cmd.Process.Pid, syscall.SIGINT)
rs.cmd.Wait()
<-rs.donec
rs.rl.SetLimit(rs.rl.Limit() + rate.Limit(rs.reqRate))
}
func (rs *runnerStresser) ModifiedKeys() int64 {
return 1
}
func (rs *runnerStresser) Checker() Checker {
return &runnerChecker{rs.errc}
}

View File

@ -140,11 +140,16 @@ func (s *keyStresser) run(ctx context.Context) {
} }
} }
func (s *keyStresser) Cancel() { func (s *keyStresser) Pause() {
s.Close()
}
func (s *keyStresser) Close() {
s.cancel() s.cancel()
s.conn.Close() s.conn.Close()
s.wg.Wait() s.wg.Wait()
plog.Infof("keyStresser %q is canceled", s.Endpoint) plog.Infof("keyStresser %q is closed", s.Endpoint)
} }
func (s *keyStresser) ModifiedKeys() int64 { func (s *keyStresser) ModifiedKeys() int64 {

View File

@ -361,13 +361,17 @@ func (ls *leaseStresser) randomlyDropLease(leaseID int64) (bool, error) {
return false, ls.ctx.Err() return false, ls.ctx.Err()
} }
func (ls *leaseStresser) Cancel() { func (ls *leaseStresser) Pause() {
plog.Debugf("lease stresser %q is canceling...", ls.endpoint) ls.Close()
}
func (ls *leaseStresser) Close() {
plog.Debugf("lease stresser %q is closing...", ls.endpoint)
ls.cancel() ls.cancel()
ls.runWg.Wait() ls.runWg.Wait()
ls.aliveWg.Wait() ls.aliveWg.Wait()
ls.conn.Close() ls.conn.Close()
plog.Infof("lease stresser %q is canceled", ls.endpoint) plog.Infof("lease stresser %q is closed", ls.endpoint)
} }
func (ls *leaseStresser) ModifiedKeys() int64 { func (ls *leaseStresser) ModifiedKeys() int64 {

View File

@ -51,7 +51,8 @@ func main() {
stressQPS := flag.Int("stress-qps", 10000, "maximum number of stresser requests per second.") stressQPS := flag.Int("stress-qps", 10000, "maximum number of stresser requests per second.")
schedCases := flag.String("schedule-cases", "", "test case schedule") schedCases := flag.String("schedule-cases", "", "test case schedule")
consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)") consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)")
stresserType := flag.String("stresser", "keys,lease", "comma separated list of stressers (keys, lease, v2keys, nop).") stresserType := flag.String("stresser", "keys,lease", "comma separated list of stressers (keys, lease, v2keys, nop, election-runner, watch-runner, lock-racer-runner, lease-runner).")
etcdRunnerPath := flag.String("etcd-runner", "", "specify a path of etcd runner binary")
failureTypes := flag.String("failures", "default,failpoints", "specify failures (concat of \"default\" and \"failpoints\").") failureTypes := flag.String("failures", "default,failpoints", "specify failures (concat of \"default\" and \"failpoints\").")
externalFailures := flag.String("external-failures", "", "specify a path of script for enabling/disabling an external fault injector") externalFailures := flag.String("external-failures", "", "specify a path of script for enabling/disabling an external fault injector")
enablePprof := flag.Bool("enable-pprof", false, "true to enable pprof") enablePprof := flag.Bool("enable-pprof", false, "true to enable pprof")
@ -120,6 +121,8 @@ func main() {
keySuffixRange: int(*stressKeySuffixRange), keySuffixRange: int(*stressKeySuffixRange),
numLeases: 10, numLeases: 10,
keysPerLease: 10, keysPerLease: 10,
etcdRunnerPath: *etcdRunnerPath,
} }
t := &tester{ t := &tester{

View File

@ -15,6 +15,7 @@
package main package main
import ( import (
"fmt"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -28,8 +29,10 @@ func init() { grpclog.SetLogger(plog) }
type Stresser interface { type Stresser interface {
// Stress starts to stress the etcd cluster // Stress starts to stress the etcd cluster
Stress() error Stress() error
// Cancel cancels the stress test on the etcd cluster // Pause stops the stresser from sending requests to etcd. Resume by calling Stress.
Cancel() Pause()
// Close releases all of the Stresser's resources.
Close()
// ModifiedKeys reports the number of keys created and deleted by stresser // ModifiedKeys reports the number of keys created and deleted by stresser
ModifiedKeys() int64 ModifiedKeys() int64
// Checker returns an invariant checker for after the stresser is canceled. // Checker returns an invariant checker for after the stresser is canceled.
@ -43,7 +46,8 @@ type nopStresser struct {
} }
func (s *nopStresser) Stress() error { return nil } func (s *nopStresser) Stress() error { return nil }
func (s *nopStresser) Cancel() {} func (s *nopStresser) Pause() {}
func (s *nopStresser) Close() {}
func (s *nopStresser) ModifiedKeys() int64 { func (s *nopStresser) ModifiedKeys() int64 {
return 0 return 0
} }
@ -59,7 +63,7 @@ func (cs *compositeStresser) Stress() error {
for i, s := range cs.stressers { for i, s := range cs.stressers {
if err := s.Stress(); err != nil { if err := s.Stress(); err != nil {
for j := 0; j < i; j++ { for j := 0; j < i; j++ {
cs.stressers[i].Cancel() cs.stressers[i].Close()
} }
return err return err
} }
@ -67,13 +71,25 @@ func (cs *compositeStresser) Stress() error {
return nil return nil
} }
func (cs *compositeStresser) Cancel() { func (cs *compositeStresser) Pause() {
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(cs.stressers)) wg.Add(len(cs.stressers))
for i := range cs.stressers { for i := range cs.stressers {
go func(s Stresser) { go func(s Stresser) {
defer wg.Done() defer wg.Done()
s.Cancel() s.Pause()
}(cs.stressers[i])
}
wg.Wait()
}
func (cs *compositeStresser) Close() {
var wg sync.WaitGroup
wg.Add(len(cs.stressers))
for i := range cs.stressers {
go func(s Stresser) {
defer wg.Done()
s.Close()
}(cs.stressers[i]) }(cs.stressers[i])
} }
wg.Wait() wg.Wait()
@ -108,6 +124,8 @@ type stressConfig struct {
keysPerLease int keysPerLease int
rateLimiter *rate.Limiter rateLimiter *rate.Limiter
etcdRunnerPath string
} }
// NewStresser creates stresser from a comma separated list of stresser types. // NewStresser creates stresser from a comma separated list of stresser types.
@ -149,6 +167,49 @@ func NewStresser(s string, sc *stressConfig, m *member) Stresser {
keysPerLease: sc.keysPerLease, keysPerLease: sc.keysPerLease,
rateLimiter: sc.rateLimiter, rateLimiter: sc.rateLimiter,
} }
case "election-runner":
reqRate := 100
args := []string{
"election",
fmt.Sprintf("%v", time.Now().UnixNano()), // election name as current nano time
"--dial-timeout=10s",
"--endpoints", m.grpcAddr(),
"--total-client-connections=10",
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
case "watch-runner":
reqRate := 100
args := []string{
"watcher",
"--prefix", fmt.Sprintf("%v", time.Now().UnixNano()), // prefix all keys with nano time
"--total-keys=1",
"--total-prefixes=1",
"--watch-per-prefix=1",
"--endpoints", m.grpcAddr(),
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
case "lock-racer-runner":
reqRate := 100
args := []string{
"lock-racer",
fmt.Sprintf("%v", time.Now().UnixNano()), // locker name as current nano time
"--endpoints", m.grpcAddr(),
"--total-client-connections=10",
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
case "lease-runner":
args := []string{
"lease-renewer",
"--ttl=30",
"--endpoints", m.grpcAddr(),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, 0)
default: default:
plog.Panicf("unknown stresser type: %s\n", s) plog.Panicf("unknown stresser type: %s\n", s)
} }

View File

@ -114,7 +114,7 @@ func (tt *tester) doRound(round int) error {
return fmt.Errorf("recovery error: %v", err) return fmt.Errorf("recovery error: %v", err)
} }
plog.Infof("%s recovered failure", tt.logPrefix()) plog.Infof("%s recovered failure", tt.logPrefix())
tt.cancelStresser() tt.pauseStresser()
plog.Infof("%s wait until cluster is healthy", tt.logPrefix()) plog.Infof("%s wait until cluster is healthy", tt.logPrefix())
if err := tt.cluster.WaitHealth(); err != nil { if err := tt.cluster.WaitHealth(); err != nil {
return fmt.Errorf("wait full health error: %v", err) return fmt.Errorf("wait full health error: %v", err)
@ -161,7 +161,7 @@ func (tt *tester) checkConsistency() (err error) {
} }
func (tt *tester) compact(rev int64, timeout time.Duration) (err error) { func (tt *tester) compact(rev int64, timeout time.Duration) (err error) {
tt.cancelStresser() tt.pauseStresser()
defer func() { defer func() {
if err == nil { if err == nil {
err = tt.startStresser() err = tt.startStresser()
@ -217,7 +217,7 @@ func (tt *tester) cleanup() error {
} }
caseFailedTotalCounter.WithLabelValues(desc).Inc() caseFailedTotalCounter.WithLabelValues(desc).Inc()
tt.cancelStresser() tt.closeStresser()
if err := tt.cluster.Cleanup(); err != nil { if err := tt.cluster.Cleanup(); err != nil {
plog.Warningf("%s cleanup error: %v", tt.logPrefix(), err) plog.Warningf("%s cleanup error: %v", tt.logPrefix(), err)
return err return err
@ -229,10 +229,10 @@ func (tt *tester) cleanup() error {
return tt.resetStressCheck() return tt.resetStressCheck()
} }
func (tt *tester) cancelStresser() { func (tt *tester) pauseStresser() {
plog.Infof("%s canceling the stressers...", tt.logPrefix()) plog.Infof("%s pausing the stressers...", tt.logPrefix())
tt.stresser.Cancel() tt.stresser.Pause()
plog.Infof("%s canceled stressers", tt.logPrefix()) plog.Infof("%s paused stressers", tt.logPrefix())
} }
func (tt *tester) startStresser() (err error) { func (tt *tester) startStresser() (err error) {
@ -242,6 +242,12 @@ func (tt *tester) startStresser() (err error) {
return err return err
} }
func (tt *tester) closeStresser() {
plog.Infof("%s closing the stressers...", tt.logPrefix())
tt.stresser.Close()
plog.Infof("%s closed stressers", tt.logPrefix())
}
func (tt *tester) resetStressCheck() error { func (tt *tester) resetStressCheck() error {
plog.Infof("%s resetting stressers and checkers...", tt.logPrefix()) plog.Infof("%s resetting stressers and checkers...", tt.logPrefix())
cs := &compositeStresser{} cs := &compositeStresser{}

View File

@ -93,11 +93,15 @@ func (s *v2Stresser) run(ctx context.Context, kv clientV2.KeysAPI) {
} }
} }
func (s *v2Stresser) Cancel() { func (s *v2Stresser) Pause() {
s.cancel() s.cancel()
s.wg.Wait() s.wg.Wait()
} }
func (s *v2Stresser) Close() {
s.Pause()
}
func (s *v2Stresser) ModifiedKeys() int64 { func (s *v2Stresser) ModifiedKeys() int64 {
return atomic.LoadInt64(&s.atomicModifiedKey) return atomic.LoadInt64(&s.atomicModifiedKey)
} }