diff --git a/tools/functional-tester/tester/cluster.go b/tools/functional-tester/tester/cluster.go index 1edab0b9c..9d77eb3a6 100644 --- a/tools/functional-tester/tester/cluster.go +++ b/tools/functional-tester/tester/cluster.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io/ioutil" + "math/rand" "net/http" "path/filepath" "strings" @@ -234,6 +235,33 @@ func NewCluster(logger *zap.Logger, fpath string) (*Cluster, error) { } go clus.serveTesterServer() + clus.updateFailures() + + clus.rateLimiter = rate.NewLimiter( + rate.Limit(int(clus.Tester.StressQPS)), + int(clus.Tester.StressQPS), + ) + clus.updateStresserChecker() + return clus, nil +} + +func (clus *Cluster) serveTesterServer() { + clus.logger.Info( + "started tester HTTP server", + zap.String("tester-address", clus.Tester.TesterAddr), + ) + err := clus.testerHTTPServer.ListenAndServe() + clus.logger.Info( + "tester HTTP server returned", + zap.String("tester-address", clus.Tester.TesterAddr), + zap.Error(err), + ) + if err != nil && err != http.ErrServerClosed { + clus.logger.Fatal("tester HTTP errored", zap.Error(err)) + } +} + +func (clus *Cluster) updateFailures() { for _, cs := range clus.Tester.FailureCases { switch cs { case "KILL_ONE_FOLLOWER": @@ -270,33 +298,51 @@ func NewCluster(logger *zap.Logger, fpath string) (*Cluster, error) { clus.failures = append(clus.failures, newFailureNoOp()) case "EXTERNAL": clus.failures = append(clus.failures, newFailureExternal(clus.Tester.ExternalExecPath)) - default: - return nil, fmt.Errorf("unknown failure %q", cs) } } - - clus.rateLimiter = rate.NewLimiter( - rate.Limit(int(clus.Tester.StressQPS)), - int(clus.Tester.StressQPS), - ) - clus.updateStresserChecker() - return clus, nil } -func (clus *Cluster) serveTesterServer() { - clus.logger.Info( - "started tester HTTP server", - zap.String("tester-address", clus.Tester.TesterAddr), - ) - err := clus.testerHTTPServer.ListenAndServe() - clus.logger.Info( - "tester HTTP server returned", - zap.String("tester-address", clus.Tester.TesterAddr), - zap.Error(err), - ) - if err != nil && err != http.ErrServerClosed { - clus.logger.Fatal("tester HTTP errored", zap.Error(err)) +func (clus *Cluster) shuffleFailures() { + rand.Seed(time.Now().UnixNano()) + offset := rand.Intn(1000) + n := len(clus.failures) + cp := coprime(n) + + clus.logger.Info("shuffling test failure cases", zap.Int("total", n)) + fs := make([]Failure, n) + for i := 0; i < n; i++ { + fs[i] = clus.failures[(cp*i+offset)%n] } + clus.failures = fs + clus.logger.Info("shuffled test failure cases", zap.Int("total", n)) +} + +/* +x and y of GCD 1 are coprime to each other + +x1 = ( coprime of n * idx1 + offset ) % n +x2 = ( coprime of n * idx2 + offset ) % n +(x2 - x1) = coprime of n * (idx2 - idx1) % n + = (idx2 - idx1) = 1 + +Consecutive x's are guaranteed to be distinct +*/ +func coprime(n int) int { + coprime := 1 + for i := n / 2; i < n; i++ { + if gcd(i, n) == 1 { + coprime = i + break + } + } + return coprime +} + +func gcd(x, y int) int { + if y == 0 { + return x + } + return gcd(y, x%y) } func (clus *Cluster) updateStresserChecker() { diff --git a/tools/functional-tester/tester/cluster_test.go b/tools/functional-tester/tester/cluster_test.go index 331606d19..4822639d3 100644 --- a/tools/functional-tester/tester/cluster_test.go +++ b/tools/functional-tester/tester/cluster_test.go @@ -131,6 +131,7 @@ func Test_newCluster(t *testing.T) { "DELAY_PEER_PORT_TX_RX_LEADER", "DELAY_PEER_PORT_TX_RX_ALL", }, + FailureShuffle: true, FailpointCommands: []string{`panic("etcd-tester")`}, RunnerExecPath: "/etcd-runner", ExternalExecPath: "", @@ -159,4 +160,30 @@ func Test_newCluster(t *testing.T) { if !reflect.DeepEqual(exp, cfg) { t.Fatalf("expected %+v, got %+v", exp, cfg) } + + cfg.logger = logger + + cfg.updateFailures() + fs1 := make([]string, len(cfg.failures)) + for i := range cfg.failures { + fs1[i] = cfg.failures[i].Desc() + } + + cfg.shuffleFailures() + fs2 := make([]string, len(cfg.failures)) + for i := range cfg.failures { + fs2[i] = cfg.failures[i].Desc() + } + if reflect.DeepEqual(fs1, fs2) { + t.Fatalf("expected shuffled failure cases, got %q", fs2) + } + + cfg.shuffleFailures() + fs3 := make([]string, len(cfg.failures)) + for i := range cfg.failures { + fs3[i] = cfg.failures[i].Desc() + } + if reflect.DeepEqual(fs2, fs3) { + t.Fatalf("expected reshuffled failure cases from %q, got %q", fs2, fs3) + } } diff --git a/tools/functional-tester/tester/tester.go b/tools/functional-tester/tester/cluster_tester.go similarity index 99% rename from tools/functional-tester/tester/tester.go rename to tools/functional-tester/tester/cluster_tester.go index 86bb601dc..649c46308 100644 --- a/tools/functional-tester/tester/tester.go +++ b/tools/functional-tester/tester/cluster_tester.go @@ -106,6 +106,9 @@ func (clus *Cluster) StartTester() { } func (clus *Cluster) doRound(round int) error { + if clus.Tester.FailureShuffle { + clus.shuffleFailures() + } for i, f := range clus.failures { clus.cs = i diff --git a/tools/functional-tester/tester/local-test.yaml b/tools/functional-tester/tester/local-test.yaml index bc55c9d81..f832d29ae 100644 --- a/tools/functional-tester/tester/local-test.yaml +++ b/tools/functional-tester/tester/local-test.yaml @@ -98,9 +98,7 @@ tester-config: - DELAY_PEER_PORT_TX_RX_LEADER - DELAY_PEER_PORT_TX_RX_ALL - # TODO: shuffle - # fail-shuffle: true - + failure-shuffle: true failpoint-commands: - panic("etcd-tester") # failpoint-commands: