diff --git a/tests/linearizability/linearizability_test.go b/tests/linearizability/linearizability_test.go index 7c7bf75a6..bbcdd43a0 100644 --- a/tests/linearizability/linearizability_test.go +++ b/tests/linearizability/linearizability_test.go @@ -16,7 +16,6 @@ package linearizability import ( "context" - "fmt" "os" "path/filepath" "strings" @@ -25,6 +24,8 @@ import ( "time" "github.com/anishathalye/porcupine" + "github.com/google/go-cmp/cmp" + "golang.org/x/sync/errgroup" "golang.org/x/time/rate" "go.etcd.io/etcd/tests/v3/framework/e2e" @@ -84,52 +85,62 @@ func TestLinearizability(t *testing.T) { } for _, tc := range tcs { t.Run(tc.name, func(t *testing.T) { - failpoint := FailpointConfig{ + ctx := context.Background() + clus, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&tc.config)) + if err != nil { + t.Fatal(err) + } + defer clus.Close() + operations, events := testLinearizability(ctx, t, clus, FailpointConfig{ failpoint: tc.failpoint, count: 1, retries: 3, waitBetweenTriggers: waitBetweenFailpointTriggers, - } - traffic := trafficConfig{ + }, trafficConfig{ minimalQPS: minimalQPS, maximalQPS: maximalQPS, clientCount: 8, traffic: DefaultTraffic, - } - testLinearizability(context.Background(), t, tc.config, failpoint, traffic) + }) + validateEventsMatch(t, events) + checkOperationsAndPersistResults(t, operations, clus) }) } } -func testLinearizability(ctx context.Context, t *testing.T, config e2e.EtcdProcessClusterConfig, failpoint FailpointConfig, traffic trafficConfig) { - clus, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&config)) - if err != nil { - t.Fatal(err) - } - defer clus.Close() - ctx, cancel := context.WithCancel(ctx) - go func() { - defer cancel() - err := triggerFailpoints(ctx, t, clus, failpoint) - if err != nil { - t.Error(err) - } - }() - operations := simulateTraffic(ctx, t, clus, traffic) - err = clus.Stop() - if err != nil { - t.Error(err) - } - checkOperationsAndPersistResults(t, operations, clus) +func testLinearizability(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, failpoint FailpointConfig, traffic trafficConfig) (operations []porcupine.Operation, events [][]watchEvent) { + // Run multiple test components (traffic, failpoints, etc) in parallel and use canceling context to propagate stop signal. + g := errgroup.Group{} + trafficCtx, trafficCancel := context.WithCancel(ctx) + g.Go(func() error { + triggerFailpoints(ctx, t, clus, failpoint) + time.Sleep(time.Second) + trafficCancel() + return nil + }) + watchCtx, watchCancel := context.WithCancel(ctx) + g.Go(func() error { + operations = simulateTraffic(trafficCtx, t, clus, traffic) + time.Sleep(time.Second) + watchCancel() + return nil + }) + g.Go(func() error { + events = collectClusterWatchEvents(watchCtx, t, clus) + return nil + }) + g.Wait() + return operations, events } -func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config FailpointConfig) error { +func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config FailpointConfig) { var err error successes := 0 failures := 0 for _, proc := range clus.Procs { if !config.failpoint.Available(proc) { - return fmt.Errorf("failpoint %q not available on %s", config.failpoint.Name(), proc.Config().Name) + t.Errorf("Failpoint %q not available on %s", config.failpoint.Name(), proc.Config().Name) + return } } for successes < config.count && failures < config.retries { @@ -143,10 +154,8 @@ func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessC successes++ } if successes < config.count || failures >= config.retries { - return fmt.Errorf("failed to trigger failpoints enough times, err: %v", err) + t.Errorf("failed to trigger failpoints enough times, err: %v", err) } - time.Sleep(config.waitBetweenTriggers) - return nil } type FailpointConfig struct { @@ -203,6 +212,25 @@ type trafficConfig struct { traffic Traffic } +func validateEventsMatch(t *testing.T, ops [][]watchEvent) { + // Move longest history to ops[0] + maxLength := len(ops[0]) + for i := 1; i < len(ops); i++ { + if len(ops[i]) > maxLength { + maxLength = len(ops[i]) + ops[0], ops[i] = ops[i], ops[0] + } + } + + for i := 1; i < len(ops); i++ { + length := len(ops[i]) + // We compare prefix of watch events, as we are not guaranteed to collect all events from each node. + if diff := cmp.Diff(ops[0][:length], ops[i][:length]); diff != "" { + t.Errorf("Events in watches do not match, %s", diff) + } + } +} + func checkOperationsAndPersistResults(t *testing.T, operations []porcupine.Operation, clus *e2e.EtcdProcessCluster) { path, err := testResultsDirectory(t) if err != nil { diff --git a/tests/linearizability/watch.go b/tests/linearizability/watch.go new file mode 100644 index 000000000..c0c279e81 --- /dev/null +++ b/tests/linearizability/watch.go @@ -0,0 +1,97 @@ +// Copyright 2022 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linearizability + +import ( + "context" + "sync" + "testing" + "time" + + "go.uber.org/zap" + + "go.etcd.io/etcd/api/v3/mvccpb" + clientv3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/tests/v3/framework/e2e" +) + +func collectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) [][]watchEvent { + mux := sync.Mutex{} + var wg sync.WaitGroup + memberEvents := make([][]watchEvent, len(clus.Procs)) + for i, member := range clus.Procs { + c, err := clientv3.New(clientv3.Config{ + Endpoints: member.EndpointsV3(), + Logger: zap.NewNop(), + DialKeepAliveTime: 1 * time.Millisecond, + DialKeepAliveTimeout: 5 * time.Millisecond, + }) + if err != nil { + t.Fatal(err) + } + + wg.Add(1) + go func(i int, c *clientv3.Client) { + defer wg.Done() + defer c.Close() + events := collectMemberWatchEvents(ctx, t, c) + mux.Lock() + memberEvents[i] = events + mux.Unlock() + }(i, c) + } + wg.Wait() + return memberEvents +} + +func collectMemberWatchEvents(ctx context.Context, t *testing.T, c *clientv3.Client) []watchEvent { + events := []watchEvent{} + var lastRevision int64 = 1 + for { + select { + case <-ctx.Done(): + return events + default: + } + for resp := range c.Watch(ctx, "", clientv3.WithPrefix(), clientv3.WithRev(lastRevision)) { + lastRevision = resp.Header.Revision + for _, event := range resp.Events { + var op OperationType + switch event.Type { + case mvccpb.PUT: + op = Put + case mvccpb.DELETE: + op = Delete + } + events = append(events, watchEvent{ + Op: op, + Key: string(event.Kv.Key), + Value: string(event.Kv.Value), + Revision: event.Kv.ModRevision, + }) + } + if resp.Err() != nil { + t.Logf("Watch error: %v", resp.Err()) + } + } + } +} + +type watchEvent struct { + Op OperationType + Key string + Value string + Revision int64 +}