Merge pull request #15044 from serathius/linearizability-watch

Watch events during linearizability test and compare history
This commit is contained in:
Marek Siarkowicz 2023-01-10 17:59:04 +01:00 committed by GitHub
commit ff898640a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 156 additions and 31 deletions

View File

@ -16,7 +16,6 @@ package linearizability
import ( import (
"context" "context"
"fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@ -25,6 +24,8 @@ import (
"time" "time"
"github.com/anishathalye/porcupine" "github.com/anishathalye/porcupine"
"github.com/google/go-cmp/cmp"
"golang.org/x/sync/errgroup"
"golang.org/x/time/rate" "golang.org/x/time/rate"
"go.etcd.io/etcd/tests/v3/framework/e2e" "go.etcd.io/etcd/tests/v3/framework/e2e"
@ -84,52 +85,62 @@ func TestLinearizability(t *testing.T) {
} }
for _, tc := range tcs { for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
failpoint := FailpointConfig{ ctx := context.Background()
clus, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&tc.config))
if err != nil {
t.Fatal(err)
}
defer clus.Close()
operations, events := testLinearizability(ctx, t, clus, FailpointConfig{
failpoint: tc.failpoint, failpoint: tc.failpoint,
count: 1, count: 1,
retries: 3, retries: 3,
waitBetweenTriggers: waitBetweenFailpointTriggers, waitBetweenTriggers: waitBetweenFailpointTriggers,
} }, trafficConfig{
traffic := trafficConfig{
minimalQPS: minimalQPS, minimalQPS: minimalQPS,
maximalQPS: maximalQPS, maximalQPS: maximalQPS,
clientCount: 8, clientCount: 8,
traffic: DefaultTraffic, traffic: DefaultTraffic,
} })
testLinearizability(context.Background(), t, tc.config, failpoint, traffic) validateEventsMatch(t, events)
checkOperationsAndPersistResults(t, operations, clus)
}) })
} }
} }
func testLinearizability(ctx context.Context, t *testing.T, config e2e.EtcdProcessClusterConfig, failpoint FailpointConfig, traffic trafficConfig) { func testLinearizability(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, failpoint FailpointConfig, traffic trafficConfig) (operations []porcupine.Operation, events [][]watchEvent) {
clus, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&config)) // Run multiple test components (traffic, failpoints, etc) in parallel and use canceling context to propagate stop signal.
if err != nil { g := errgroup.Group{}
t.Fatal(err) trafficCtx, trafficCancel := context.WithCancel(ctx)
} g.Go(func() error {
defer clus.Close() triggerFailpoints(ctx, t, clus, failpoint)
ctx, cancel := context.WithCancel(ctx) time.Sleep(time.Second)
go func() { trafficCancel()
defer cancel() return nil
err := triggerFailpoints(ctx, t, clus, failpoint) })
if err != nil { watchCtx, watchCancel := context.WithCancel(ctx)
t.Error(err) g.Go(func() error {
} operations = simulateTraffic(trafficCtx, t, clus, traffic)
}() time.Sleep(time.Second)
operations := simulateTraffic(ctx, t, clus, traffic) watchCancel()
err = clus.Stop() return nil
if err != nil { })
t.Error(err) g.Go(func() error {
} events = collectClusterWatchEvents(watchCtx, t, clus)
checkOperationsAndPersistResults(t, operations, clus) return nil
})
g.Wait()
return operations, events
} }
func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config FailpointConfig) error { func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config FailpointConfig) {
var err error var err error
successes := 0 successes := 0
failures := 0 failures := 0
for _, proc := range clus.Procs { for _, proc := range clus.Procs {
if !config.failpoint.Available(proc) { if !config.failpoint.Available(proc) {
return fmt.Errorf("failpoint %q not available on %s", config.failpoint.Name(), proc.Config().Name) t.Errorf("Failpoint %q not available on %s", config.failpoint.Name(), proc.Config().Name)
return
} }
} }
for successes < config.count && failures < config.retries { for successes < config.count && failures < config.retries {
@ -143,10 +154,8 @@ func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessC
successes++ successes++
} }
if successes < config.count || failures >= config.retries { if successes < config.count || failures >= config.retries {
return fmt.Errorf("failed to trigger failpoints enough times, err: %v", err) t.Errorf("failed to trigger failpoints enough times, err: %v", err)
} }
time.Sleep(config.waitBetweenTriggers)
return nil
} }
type FailpointConfig struct { type FailpointConfig struct {
@ -203,6 +212,25 @@ type trafficConfig struct {
traffic Traffic traffic Traffic
} }
func validateEventsMatch(t *testing.T, ops [][]watchEvent) {
// Move longest history to ops[0]
maxLength := len(ops[0])
for i := 1; i < len(ops); i++ {
if len(ops[i]) > maxLength {
maxLength = len(ops[i])
ops[0], ops[i] = ops[i], ops[0]
}
}
for i := 1; i < len(ops); i++ {
length := len(ops[i])
// We compare prefix of watch events, as we are not guaranteed to collect all events from each node.
if diff := cmp.Diff(ops[0][:length], ops[i][:length]); diff != "" {
t.Errorf("Events in watches do not match, %s", diff)
}
}
}
func checkOperationsAndPersistResults(t *testing.T, operations []porcupine.Operation, clus *e2e.EtcdProcessCluster) { func checkOperationsAndPersistResults(t *testing.T, operations []porcupine.Operation, clus *e2e.EtcdProcessCluster) {
path, err := testResultsDirectory(t) path, err := testResultsDirectory(t)
if err != nil { if err != nil {

View File

@ -0,0 +1,97 @@
// Copyright 2022 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package linearizability
import (
"context"
"sync"
"testing"
"time"
"go.uber.org/zap"
"go.etcd.io/etcd/api/v3/mvccpb"
clientv3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/tests/v3/framework/e2e"
)
func collectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) [][]watchEvent {
mux := sync.Mutex{}
var wg sync.WaitGroup
memberEvents := make([][]watchEvent, len(clus.Procs))
for i, member := range clus.Procs {
c, err := clientv3.New(clientv3.Config{
Endpoints: member.EndpointsV3(),
Logger: zap.NewNop(),
DialKeepAliveTime: 1 * time.Millisecond,
DialKeepAliveTimeout: 5 * time.Millisecond,
})
if err != nil {
t.Fatal(err)
}
wg.Add(1)
go func(i int, c *clientv3.Client) {
defer wg.Done()
defer c.Close()
events := collectMemberWatchEvents(ctx, t, c)
mux.Lock()
memberEvents[i] = events
mux.Unlock()
}(i, c)
}
wg.Wait()
return memberEvents
}
func collectMemberWatchEvents(ctx context.Context, t *testing.T, c *clientv3.Client) []watchEvent {
events := []watchEvent{}
var lastRevision int64 = 1
for {
select {
case <-ctx.Done():
return events
default:
}
for resp := range c.Watch(ctx, "", clientv3.WithPrefix(), clientv3.WithRev(lastRevision)) {
lastRevision = resp.Header.Revision
for _, event := range resp.Events {
var op OperationType
switch event.Type {
case mvccpb.PUT:
op = Put
case mvccpb.DELETE:
op = Delete
}
events = append(events, watchEvent{
Op: op,
Key: string(event.Kv.Key),
Value: string(event.Kv.Value),
Revision: event.Kv.ModRevision,
})
}
if resp.Err() != nil {
t.Logf("Watch error: %v", resp.Err())
}
}
}
}
type watchEvent struct {
Op OperationType
Key string
Value string
Revision int64
}