diff --git a/tools/functional-tester/etcd-tester/checks.go b/tools/functional-tester/etcd-tester/checks.go index 1d3d8342a..d7f450374 100644 --- a/tools/functional-tester/etcd-tester/checks.go +++ b/tools/functional-tester/etcd-tester/checks.go @@ -46,9 +46,8 @@ func (hc *hashChecker) Check() (err error) { hashes map[string]int64 ok bool ) - for i := 0; i < 7; i++ { - time.Sleep(time.Second) - + // retry in case of transient failure + for i := 0; i < 3; i++ { revs, hashes, err = hc.hrg.getRevisionHash() if err != nil { plog.Printf("#%d failed to get current revisions (%v)", i, err) @@ -59,6 +58,7 @@ func (hc *hashChecker) Check() (err error) { } plog.Printf("#%d inconsistent current revisions %+v", i, revs) + time.Sleep(time.Second) } if !ok || err != nil { return fmt.Errorf("checking current revisions failed [err: %v, revisions: %v]", err, revs) diff --git a/tools/functional-tester/etcd-tester/tester.go b/tools/functional-tester/etcd-tester/tester.go index c439e6251..2d038aef3 100644 --- a/tools/functional-tester/etcd-tester/tester.go +++ b/tools/functional-tester/etcd-tester/tester.go @@ -104,6 +104,11 @@ func (tt *tester) doRound(round int) (bool, error) { plog.Printf("%s recovery error: %v", tt.logPrefix(), err) return false, nil } + plog.Printf("%s wait until cluster is healthy", tt.logPrefix()) + if err := tt.cluster.WaitHealth(); err != nil { + plog.Printf("%s wait full health error: %v", tt.logPrefix(), err) + return false, nil + } plog.Printf("%s recovered failure", tt.logPrefix()) if err := tt.checkConsistency(); err != nil {