mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
284 lines
8.0 KiB
Go
284 lines
8.0 KiB
Go
// Copyright 2022 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package robustness
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/anishathalye/porcupine"
|
|
"go.uber.org/zap"
|
|
"go.uber.org/zap/zaptest"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"go.etcd.io/etcd/api/v3/version"
|
|
"go.etcd.io/etcd/tests/v3/framework/e2e"
|
|
"go.etcd.io/etcd/tests/v3/robustness/model"
|
|
)
|
|
|
|
const (
|
|
// waitBetweenFailpointTriggers
|
|
waitBetweenFailpointTriggers = time.Second
|
|
)
|
|
|
|
var (
|
|
LowTraffic = trafficConfig{
|
|
name: "LowTraffic",
|
|
minimalQPS: 100,
|
|
maximalQPS: 200,
|
|
clientCount: 8,
|
|
requestProgress: false,
|
|
traffic: traffic{
|
|
keyCount: 10,
|
|
leaseTTL: DefaultLeaseTTL,
|
|
largePutSize: 32769,
|
|
writes: []requestChance{
|
|
{operation: Put, chance: 45},
|
|
{operation: LargePut, chance: 5},
|
|
{operation: Delete, chance: 10},
|
|
{operation: MultiOpTxn, chance: 10},
|
|
{operation: PutWithLease, chance: 10},
|
|
{operation: LeaseRevoke, chance: 10},
|
|
{operation: CompareAndSet, chance: 10},
|
|
},
|
|
},
|
|
}
|
|
HighTraffic = trafficConfig{
|
|
name: "HighTraffic",
|
|
minimalQPS: 200,
|
|
maximalQPS: 1000,
|
|
clientCount: 12,
|
|
requestProgress: false,
|
|
traffic: traffic{
|
|
keyCount: 10,
|
|
largePutSize: 32769,
|
|
leaseTTL: DefaultLeaseTTL,
|
|
writes: []requestChance{
|
|
{operation: Put, chance: 85},
|
|
{operation: MultiOpTxn, chance: 10},
|
|
{operation: LargePut, chance: 5},
|
|
},
|
|
},
|
|
}
|
|
ReqProgTraffic = trafficConfig{
|
|
name: "RequestProgressTraffic",
|
|
minimalQPS: 200,
|
|
maximalQPS: 1000,
|
|
clientCount: 12,
|
|
requestProgress: true,
|
|
traffic: traffic{
|
|
keyCount: 10,
|
|
largePutSize: 8196,
|
|
leaseTTL: DefaultLeaseTTL,
|
|
writes: []requestChance{
|
|
{operation: Put, chance: 95},
|
|
{operation: LargePut, chance: 5},
|
|
},
|
|
},
|
|
}
|
|
defaultTraffic = LowTraffic
|
|
trafficList = []trafficConfig{
|
|
LowTraffic, HighTraffic,
|
|
}
|
|
)
|
|
|
|
func TestRobustness(t *testing.T) {
|
|
testRunner.BeforeTest(t)
|
|
v, err := e2e.GetVersionFromBinary(e2e.BinPath.Etcd)
|
|
if err != nil {
|
|
t.Fatalf("Failed checking etcd version binary, binary: %q, err: %v", e2e.BinPath.Etcd, err)
|
|
}
|
|
type scenario struct {
|
|
name string
|
|
failpoint Failpoint
|
|
config e2e.EtcdProcessClusterConfig
|
|
traffic *trafficConfig
|
|
}
|
|
scenarios := []scenario{}
|
|
for _, traffic := range trafficList {
|
|
scenarios = append(scenarios, scenario{
|
|
name: "ClusterOfSize1/" + traffic.name,
|
|
failpoint: RandomOneNodeClusterFailpoint,
|
|
traffic: &traffic,
|
|
config: *e2e.NewConfig(
|
|
e2e.WithClusterSize(1),
|
|
e2e.WithSnapshotCount(100),
|
|
e2e.WithGoFailEnabled(true),
|
|
e2e.WithCompactionBatchLimit(100), // required for compactBeforeCommitBatch and compactAfterCommitBatch failpoints
|
|
e2e.WithWatchProcessNotifyInterval(100*time.Millisecond),
|
|
),
|
|
})
|
|
scenarios = append(scenarios, scenario{
|
|
name: "ClusterOfSize3/" + traffic.name,
|
|
failpoint: RandomMultiNodeClusterFailpoint,
|
|
traffic: &traffic,
|
|
config: *e2e.NewConfig(
|
|
e2e.WithIsPeerTLS(true),
|
|
e2e.WithSnapshotCount(100),
|
|
e2e.WithPeerProxy(true),
|
|
e2e.WithGoFailEnabled(true),
|
|
e2e.WithCompactionBatchLimit(100), // required for compactBeforeCommitBatch and compactAfterCommitBatch failpoints
|
|
e2e.WithWatchProcessNotifyInterval(100*time.Millisecond),
|
|
),
|
|
})
|
|
}
|
|
scenarios = append(scenarios, scenario{
|
|
name: "Issue14370",
|
|
failpoint: RaftBeforeSavePanic,
|
|
config: *e2e.NewConfig(
|
|
e2e.WithClusterSize(1),
|
|
e2e.WithGoFailEnabled(true),
|
|
),
|
|
})
|
|
scenarios = append(scenarios, scenario{
|
|
name: "Issue14685",
|
|
failpoint: DefragBeforeCopyPanic,
|
|
config: *e2e.NewConfig(
|
|
e2e.WithClusterSize(1),
|
|
e2e.WithGoFailEnabled(true),
|
|
),
|
|
})
|
|
scenarios = append(scenarios, scenario{
|
|
name: "Issue13766",
|
|
failpoint: KillFailpoint,
|
|
traffic: &HighTraffic,
|
|
config: *e2e.NewConfig(
|
|
e2e.WithSnapshotCount(100),
|
|
),
|
|
})
|
|
scenarios = append(scenarios, scenario{
|
|
name: "Issue15220",
|
|
failpoint: RandomOneNodeClusterFailpoint,
|
|
traffic: &ReqProgTraffic,
|
|
config: *e2e.NewConfig(
|
|
e2e.WithClusterSize(1),
|
|
),
|
|
})
|
|
if v.Compare(version.V3_5) >= 0 {
|
|
scenarios = append(scenarios, scenario{
|
|
name: "Issue15271",
|
|
failpoint: BlackholeUntilSnapshot,
|
|
traffic: &HighTraffic,
|
|
config: *e2e.NewConfig(
|
|
e2e.WithSnapshotCount(100),
|
|
e2e.WithPeerProxy(true),
|
|
e2e.WithIsPeerTLS(true),
|
|
),
|
|
})
|
|
}
|
|
snapshotOptions := []e2e.EPClusterOption{
|
|
e2e.WithGoFailEnabled(true),
|
|
e2e.WithSnapshotCount(100),
|
|
e2e.WithPeerProxy(true),
|
|
e2e.WithIsPeerTLS(true),
|
|
}
|
|
if v.Compare(version.V3_6) >= 0 {
|
|
snapshotOptions = append(snapshotOptions, e2e.WithSnapshotCatchUpEntries(100))
|
|
}
|
|
scenarios = append(scenarios, scenario{
|
|
name: "Snapshot",
|
|
failpoint: RandomSnapshotFailpoint,
|
|
traffic: &HighTraffic,
|
|
config: *e2e.NewConfig(snapshotOptions...),
|
|
})
|
|
for _, scenario := range scenarios {
|
|
if scenario.traffic == nil {
|
|
scenario.traffic = &defaultTraffic
|
|
}
|
|
|
|
t.Run(scenario.name, func(t *testing.T) {
|
|
lg := zaptest.NewLogger(t)
|
|
scenario.config.Logger = lg
|
|
ctx := context.Background()
|
|
testRobustness(ctx, t, lg, scenario.config, scenario.traffic, FailpointConfig{
|
|
failpoint: scenario.failpoint,
|
|
count: 1,
|
|
retries: 3,
|
|
waitBetweenTriggers: waitBetweenFailpointTriggers,
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, config e2e.EtcdProcessClusterConfig, traffic *trafficConfig, failpoint FailpointConfig) {
|
|
r := report{lg: lg}
|
|
var err error
|
|
r.clus, err = e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&config))
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer r.clus.Close()
|
|
|
|
defer func() {
|
|
r.Report(t)
|
|
}()
|
|
r.operations, r.responses = runScenario(ctx, t, lg, r.clus, *traffic, failpoint)
|
|
forcestopCluster(r.clus)
|
|
|
|
watchProgressNotifyEnabled := r.clus.Cfg.WatchProcessNotifyInterval != 0
|
|
validateWatchResponses(t, r.clus, r.responses, traffic.requestProgress || watchProgressNotifyEnabled)
|
|
|
|
r.events = watchEvents(r.responses)
|
|
validateEventsMatch(t, r.events)
|
|
|
|
r.patchedOperations = patchOperationBasedOnWatchEvents(r.operations, longestHistory(r.events))
|
|
r.visualizeHistory = model.ValidateOperationHistoryAndReturnVisualize(t, lg, r.patchedOperations)
|
|
}
|
|
|
|
func runScenario(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster, traffic trafficConfig, failpoint FailpointConfig) (operations []porcupine.Operation, responses [][]watchResponse) {
|
|
g := errgroup.Group{}
|
|
finishTraffic := make(chan struct{})
|
|
|
|
g.Go(func() error {
|
|
defer close(finishTraffic)
|
|
triggerFailpoints(ctx, t, lg, clus, failpoint)
|
|
time.Sleep(time.Second)
|
|
return nil
|
|
})
|
|
maxRevisionChan := make(chan int64, 1)
|
|
g.Go(func() error {
|
|
defer close(maxRevisionChan)
|
|
operations = simulateTraffic(ctx, t, lg, clus, traffic, finishTraffic)
|
|
maxRevisionChan <- operationsMaxRevision(operations)
|
|
return nil
|
|
})
|
|
g.Go(func() error {
|
|
responses = collectClusterWatchEvents(ctx, t, clus, maxRevisionChan, traffic.requestProgress)
|
|
return nil
|
|
})
|
|
g.Wait()
|
|
return operations, responses
|
|
}
|
|
|
|
func operationsMaxRevision(operations []porcupine.Operation) int64 {
|
|
var maxRevision int64
|
|
for _, op := range operations {
|
|
revision := op.Output.(model.EtcdResponse).Revision
|
|
if revision > maxRevision {
|
|
maxRevision = revision
|
|
}
|
|
}
|
|
return maxRevision
|
|
}
|
|
|
|
// forcestopCluster stops the etcd member with signal kill.
|
|
func forcestopCluster(clus *e2e.EtcdProcessCluster) error {
|
|
for _, member := range clus.Procs {
|
|
member.Kill()
|
|
}
|
|
return clus.Stop()
|
|
}
|