mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcd-tester: refactor
This commit is contained in:
parent
3bcd2b5b9f
commit
cc7dd9b729
@ -31,7 +31,7 @@ func main() {
|
|||||||
datadir := flag.String("data-dir", "agent.etcd", "etcd data directory location on agent machine.")
|
datadir := flag.String("data-dir", "agent.etcd", "etcd data directory location on agent machine.")
|
||||||
stressKeySize := flag.Int("stress-key-size", 100, "the size of each key written into etcd.")
|
stressKeySize := flag.Int("stress-key-size", 100, "the size of each key written into etcd.")
|
||||||
stressKeySuffixRange := flag.Int("stress-key-count", 250000, "the count of key range written into etcd.")
|
stressKeySuffixRange := flag.Int("stress-key-count", 250000, "the count of key range written into etcd.")
|
||||||
limit := flag.Int("limit", 3, "the limit of rounds to run failure set.")
|
limit := flag.Int("limit", -1, "the limit of rounds to run failure set (-1 to run without limits).")
|
||||||
schedCases := flag.String("schedule-cases", "", "test case schedule")
|
schedCases := flag.String("schedule-cases", "", "test case schedule")
|
||||||
consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)")
|
consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)")
|
||||||
isV2Only := flag.Bool("v2-only", false, "'true' to run V2 only tester.")
|
isV2Only := flag.Bool("v2-only", false, "'true' to run V2 only tester.")
|
||||||
|
57
tools/functional-tester/etcd-tester/status.go
Normal file
57
tools/functional-tester/etcd-tester/status.go
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
// Copyright 2016 CoreOS, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Status struct {
|
||||||
|
Since time.Time
|
||||||
|
Failures []string
|
||||||
|
RoundLimit int
|
||||||
|
|
||||||
|
Cluster ClusterStatus
|
||||||
|
cluster *cluster
|
||||||
|
|
||||||
|
mu sync.Mutex // guards Round and Case
|
||||||
|
Round int
|
||||||
|
Case int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Status) setRound(r int) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.Round = r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Status) getRound() int {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
return s.Round
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Status) setCase(c int) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.Case = c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Status) getCase() int {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
return s.Case
|
||||||
|
}
|
@ -15,7 +15,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -26,6 +26,7 @@ type tester struct {
|
|||||||
consistencyCheck bool
|
consistencyCheck bool
|
||||||
|
|
||||||
status Status
|
status Status
|
||||||
|
currentRevision int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tt *tester) runLoop() {
|
func (tt *tester) runLoop() {
|
||||||
@ -35,201 +36,227 @@ func (tt *tester) runLoop() {
|
|||||||
for _, f := range tt.failures {
|
for _, f := range tt.failures {
|
||||||
tt.status.Failures = append(tt.status.Failures, f.Desc())
|
tt.status.Failures = append(tt.status.Failures, f.Desc())
|
||||||
}
|
}
|
||||||
for i := 0; i < tt.limit; i++ {
|
round := 0
|
||||||
tt.status.setRound(i)
|
for {
|
||||||
|
tt.status.setRound(round)
|
||||||
|
tt.status.setCase(-1) // -1 so that logPrefix doesn't print out 'case'
|
||||||
roundTotalCounter.Inc()
|
roundTotalCounter.Inc()
|
||||||
|
|
||||||
var (
|
var failed bool
|
||||||
currentRevision int64
|
|
||||||
failed bool
|
|
||||||
)
|
|
||||||
for j, f := range tt.failures {
|
for j, f := range tt.failures {
|
||||||
caseTotalCounter.WithLabelValues(f.Desc()).Inc()
|
caseTotalCounter.WithLabelValues(f.Desc()).Inc()
|
||||||
tt.status.setCase(j)
|
tt.status.setCase(j)
|
||||||
|
|
||||||
if err := tt.cluster.WaitHealth(); err != nil {
|
if err := tt.cluster.WaitHealth(); err != nil {
|
||||||
plog.Printf("[round#%d case#%d] wait full health error: %v", i, j, err)
|
plog.Printf("%s wait full health error: %v", tt.logPrefix(), err)
|
||||||
if err := tt.cleanup(i, j); err != nil {
|
if err := tt.cleanup(); err != nil {
|
||||||
plog.Printf("[round#%d case#%d] cleanup error: %v", i, j, err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
failed = true
|
failed = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
plog.Printf("[round#%d case#%d] start failure %s", i, j, f.Desc())
|
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] start injecting failure...", i, j)
|
plog.Printf("%s starting failure %s", tt.logPrefix(), f.Desc())
|
||||||
if err := f.Inject(tt.cluster, i); err != nil {
|
|
||||||
plog.Printf("[round#%d case#%d] injection error: %v", i, j, err)
|
plog.Printf("%s injecting failure...", tt.logPrefix())
|
||||||
if err := tt.cleanup(i, j); err != nil {
|
if err := f.Inject(tt.cluster, round); err != nil {
|
||||||
plog.Printf("[round#%d case#%d] cleanup error: %v", i, j, err)
|
plog.Printf("%s injection error: %v", tt.logPrefix(), err)
|
||||||
|
if err := tt.cleanup(); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
failed = true
|
failed = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
plog.Printf("[round#%d case#%d] injected failure", i, j)
|
plog.Printf("%s injected failure", tt.logPrefix())
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] start recovering failure...", i, j)
|
plog.Printf("%s recovering failure...", tt.logPrefix())
|
||||||
if err := f.Recover(tt.cluster, i); err != nil {
|
if err := f.Recover(tt.cluster, round); err != nil {
|
||||||
plog.Printf("[round#%d case#%d] recovery error: %v", i, j, err)
|
plog.Printf("%s recovery error: %v", tt.logPrefix(), err)
|
||||||
if err := tt.cleanup(i, j); err != nil {
|
if err := tt.cleanup(); err != nil {
|
||||||
plog.Printf("[round#%d case#%d] cleanup error: %v", i, j, err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
failed = true
|
failed = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
plog.Printf("[round#%d case#%d] recovered failure", i, j)
|
plog.Printf("%s recovered failure", tt.logPrefix())
|
||||||
|
|
||||||
if !tt.consistencyCheck {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if tt.cluster.v2Only {
|
if tt.cluster.v2Only {
|
||||||
plog.Printf("[round#%d case#%d] succeed!", i, j)
|
plog.Printf("%s succeed!", tt.logPrefix())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] canceling the stressers...", i, j)
|
var err error
|
||||||
|
failed, err = tt.updateCurrentRevisionHash(tt.consistencyCheck)
|
||||||
|
if err != nil {
|
||||||
|
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if failed {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
plog.Printf("%s succeed!", tt.logPrefix())
|
||||||
|
}
|
||||||
|
|
||||||
|
// -1 so that logPrefix doesn't print out 'case'
|
||||||
|
tt.status.setCase(-1)
|
||||||
|
|
||||||
|
if failed {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
revToCompact := max(0, tt.currentRevision-10000)
|
||||||
|
if err := tt.compact(revToCompact); err != nil {
|
||||||
|
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if round > 0 && round%500 == 0 { // every 500 rounds
|
||||||
|
if err := tt.defrag(); err != nil {
|
||||||
|
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
round++
|
||||||
|
if round == tt.limit {
|
||||||
|
plog.Printf("%s functional-tester is finished", tt.logPrefix())
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt *tester) logPrefix() string {
|
||||||
|
var (
|
||||||
|
rd = tt.status.getRound()
|
||||||
|
cs = tt.status.getCase()
|
||||||
|
prefix = fmt.Sprintf("[round#%d case#%d]", rd, cs)
|
||||||
|
)
|
||||||
|
if cs == -1 {
|
||||||
|
prefix = fmt.Sprintf("[round#%d]", rd)
|
||||||
|
}
|
||||||
|
return prefix
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt *tester) cleanup() error {
|
||||||
|
roundFailedTotalCounter.Inc()
|
||||||
|
caseFailedTotalCounter.WithLabelValues(tt.failures[tt.status.Case].Desc()).Inc()
|
||||||
|
|
||||||
|
plog.Printf("%s cleaning up...", tt.logPrefix())
|
||||||
|
if err := tt.cluster.Cleanup(); err != nil {
|
||||||
|
plog.Printf("%s cleanup error: %v", tt.logPrefix(), err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return tt.cluster.Bootstrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt *tester) cancelStressers() {
|
||||||
|
plog.Printf("%s canceling the stressers...", tt.logPrefix())
|
||||||
for _, s := range tt.cluster.Stressers {
|
for _, s := range tt.cluster.Stressers {
|
||||||
s.Cancel()
|
s.Cancel()
|
||||||
}
|
}
|
||||||
plog.Printf("[round#%d case#%d] canceled stressers", i, j)
|
plog.Printf("%s canceled stressers", tt.logPrefix())
|
||||||
|
}
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] checking current revisions...", i, j)
|
func (tt *tester) startStressers() {
|
||||||
|
plog.Printf("%s starting the stressers...", tt.logPrefix())
|
||||||
|
for _, s := range tt.cluster.Stressers {
|
||||||
|
go s.Stress()
|
||||||
|
}
|
||||||
|
plog.Printf("%s started stressers", tt.logPrefix())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt *tester) compact(rev int64) error {
|
||||||
|
plog.Printf("%s compacting storage at %d (current revision %d)", tt.logPrefix(), rev, tt.currentRevision)
|
||||||
|
if err := tt.cluster.compactKV(rev); err != nil {
|
||||||
|
plog.Printf("%s compactKV error (%v)", tt.logPrefix(), err)
|
||||||
|
if cerr := tt.cleanup(); cerr != nil {
|
||||||
|
return fmt.Errorf("%s, %s", err, cerr)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
plog.Printf("%s compacted storage at %d", tt.logPrefix(), rev)
|
||||||
|
|
||||||
|
plog.Printf("%s checking compaction at %d", tt.logPrefix(), rev)
|
||||||
|
if err := tt.cluster.checkCompact(rev); err != nil {
|
||||||
|
plog.Printf("%s checkCompact error (%v)", tt.logPrefix(), err)
|
||||||
|
if cerr := tt.cleanup(); cerr != nil {
|
||||||
|
return fmt.Errorf("%s, %s", err, cerr)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
plog.Printf("%s confirmed compaction at %d", tt.logPrefix(), rev)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt *tester) defrag() error {
|
||||||
|
tt.cancelStressers()
|
||||||
|
defer tt.startStressers()
|
||||||
|
|
||||||
|
plog.Printf("%s defragmenting...", tt.logPrefix())
|
||||||
|
if err := tt.cluster.defrag(); err != nil {
|
||||||
|
plog.Printf("%s defrag error (%v)", tt.logPrefix(), err)
|
||||||
|
if cerr := tt.cleanup(); cerr != nil {
|
||||||
|
return fmt.Errorf("%s, %s", err, cerr)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
plog.Printf("%s defragmented...", tt.logPrefix())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tt *tester) updateCurrentRevisionHash(check bool) (failed bool, err error) {
|
||||||
|
if check {
|
||||||
|
tt.cancelStressers()
|
||||||
|
defer tt.startStressers()
|
||||||
|
}
|
||||||
|
|
||||||
|
plog.Printf("%s updating current revisions...", tt.logPrefix())
|
||||||
var (
|
var (
|
||||||
revs map[string]int64
|
revs map[string]int64
|
||||||
hashes map[string]int64
|
hashes map[string]int64
|
||||||
rerr error
|
rerr error
|
||||||
ok bool
|
ok bool
|
||||||
)
|
)
|
||||||
for k := 0; k < 5; k++ {
|
for i := 0; i < 7; i++ {
|
||||||
time.Sleep(time.Second)
|
|
||||||
|
|
||||||
revs, hashes, rerr = tt.cluster.getRevisionHash()
|
revs, hashes, rerr = tt.cluster.getRevisionHash()
|
||||||
if rerr != nil {
|
if rerr != nil {
|
||||||
plog.Printf("[round#%d case#%d.%d] failed to get current revisions (%v)", i, j, k, rerr)
|
plog.Printf("%s #%d failed to get current revisions (%v)", tt.logPrefix(), i, rerr)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if currentRevision, ok = getSameValue(revs); ok {
|
if tt.currentRevision, ok = getSameValue(revs); ok {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d.%d] inconsistent current revisions %+v", i, j, k, revs)
|
plog.Printf("%s #%d inconsistent current revisions %+v", tt.logPrefix(), i, revs)
|
||||||
|
if !check {
|
||||||
|
break // if consistency check is false, just try once
|
||||||
}
|
}
|
||||||
|
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
}
|
||||||
|
plog.Printf("%s updated current revisions with %d", tt.logPrefix(), tt.currentRevision)
|
||||||
|
|
||||||
|
if !check {
|
||||||
|
failed = false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if !ok || rerr != nil {
|
if !ok || rerr != nil {
|
||||||
plog.Printf("[round#%d case#%d] checking current revisions failed [revisions: %v]", i, j, revs)
|
plog.Printf("%s checking current revisions failed [revisions: %v]", tt.logPrefix(), revs)
|
||||||
if err := tt.cleanup(i, j); err != nil {
|
failed = true
|
||||||
plog.Printf("[round#%d case#%d] cleanup error: %v", i, j, err)
|
err = tt.cleanup()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
failed = true
|
plog.Printf("%s all members are consistent with current revisions [revisions: %v]", tt.logPrefix(), revs)
|
||||||
break
|
|
||||||
}
|
|
||||||
plog.Printf("[round#%d case#%d] all members are consistent with current revisions [revisions: %v]", i, j, revs)
|
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] checking current storage hashes...", i, j)
|
plog.Printf("%s checking current storage hashes...", tt.logPrefix())
|
||||||
if _, ok = getSameValue(hashes); !ok {
|
if _, ok = getSameValue(hashes); !ok {
|
||||||
plog.Printf("[round#%d case#%d] checking current storage hashes failed [hashes: %v]", i, j, hashes)
|
plog.Printf("%s checking current storage hashes failed [hashes: %v]", tt.logPrefix(), hashes)
|
||||||
if err := tt.cleanup(i, j); err != nil {
|
|
||||||
plog.Printf("[round#%d case#%d] cleanup error: %v", i, j, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
failed = true
|
failed = true
|
||||||
break
|
err = tt.cleanup()
|
||||||
}
|
|
||||||
plog.Printf("[round#%d case#%d] all members are consistent with storage hashes", i, j)
|
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] restarting the stressers...", i, j)
|
|
||||||
for _, s := range tt.cluster.Stressers {
|
|
||||||
go s.Stress()
|
|
||||||
}
|
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] succeed!", i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
if failed {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
revToCompact := max(0, currentRevision-10000)
|
|
||||||
plog.Printf("[round#%d] compacting storage at %d (current revision %d)", i, revToCompact, currentRevision)
|
|
||||||
if err := tt.cluster.compactKV(revToCompact); err != nil {
|
|
||||||
plog.Printf("[round#%d] compactKV error (%v)", i, err)
|
|
||||||
if err := tt.cleanup(i, 0); err != nil {
|
|
||||||
plog.Printf("[round#%d] cleanup error: %v", i, err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
continue
|
plog.Printf("%s all members are consistent with storage hashes", tt.logPrefix())
|
||||||
}
|
|
||||||
plog.Printf("[round#%d] compacted storage", i)
|
|
||||||
|
|
||||||
plog.Printf("[round#%d] check compaction at %d", i, revToCompact)
|
|
||||||
if err := tt.cluster.checkCompact(revToCompact); err != nil {
|
|
||||||
plog.Printf("[round#%d] checkCompact error (%v)", i, err)
|
|
||||||
if err := tt.cleanup(i, 0); err != nil {
|
|
||||||
plog.Printf("[round#%d] cleanup error: %v", i, err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
|
||||||
plog.Printf("[round#%d] confirmed compaction at %d", i, revToCompact)
|
|
||||||
|
|
||||||
if i > 0 && i%500 == 0 { // every 500 rounds
|
|
||||||
plog.Printf("[round#%d] canceling the stressers...", i)
|
|
||||||
for _, s := range tt.cluster.Stressers {
|
|
||||||
s.Cancel()
|
|
||||||
}
|
|
||||||
plog.Printf("[round#%d] canceled stressers", i)
|
|
||||||
|
|
||||||
plog.Printf("[round#%d] deframenting...", i)
|
|
||||||
if err := tt.cluster.defrag(); err != nil {
|
|
||||||
plog.Printf("[round#%d] defrag error (%v)", i, err)
|
|
||||||
if err := tt.cleanup(i, 0); err != nil {
|
|
||||||
plog.Printf("[round#%d] cleanup error: %v", i, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
plog.Printf("[round#%d] deframented...", i)
|
|
||||||
|
|
||||||
plog.Printf("[round#%d] restarting the stressers...", i)
|
|
||||||
for _, s := range tt.cluster.Stressers {
|
|
||||||
go s.Stress()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tt *tester) cleanup(i, j int) error {
|
|
||||||
roundFailedTotalCounter.Inc()
|
|
||||||
caseFailedTotalCounter.WithLabelValues(tt.failures[j].Desc()).Inc()
|
|
||||||
|
|
||||||
plog.Printf("[round#%d case#%d] cleaning up...", i, j)
|
|
||||||
if err := tt.cluster.Cleanup(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return tt.cluster.Bootstrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
type Status struct {
|
|
||||||
Since time.Time
|
|
||||||
Failures []string
|
|
||||||
RoundLimit int
|
|
||||||
|
|
||||||
Cluster ClusterStatus
|
|
||||||
cluster *cluster
|
|
||||||
|
|
||||||
mu sync.Mutex // guards Round and Case
|
|
||||||
Round int
|
|
||||||
Case int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Status) setRound(r int) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.Round = r
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Status) setCase(c int) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
s.Case = c
|
|
||||||
}
|
|
||||||
|
@ -21,7 +21,7 @@ func getSameValue(vals map[string]int64) (int64, bool) {
|
|||||||
rv = v
|
rv = v
|
||||||
}
|
}
|
||||||
if rv != v {
|
if rv != v {
|
||||||
return -1, false
|
return rv, false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return rv, true
|
return rv, true
|
||||||
|
Loading…
x
Reference in New Issue
Block a user