Marek Siarkowicz 7181c7532f Relax assumptions about all client request persisted in WAL to only require first and last request to be persisted
This assumption is not true during durability issues like #14370.
In reality we want to avoid situations where WAL is was truncated, for
that it's enough that we ensure that first and last operations are
present.

Found it when running `make test-robustness-issue14370` and instead of
getting `Model is not linearizable` I got that assumptions were broken.

Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
2024-05-08 10:40:38 +02:00

203 lines
6.0 KiB
Go

// Copyright 2023 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package validate
import (
"encoding/json"
"fmt"
"testing"
"time"
"github.com/anishathalye/porcupine"
"go.uber.org/zap"
"go.etcd.io/etcd/tests/v3/robustness/model"
"go.etcd.io/etcd/tests/v3/robustness/report"
)
// ValidateAndReturnVisualize returns visualize as porcupine.linearizationInfo used to generate visualization is private.
func ValidateAndReturnVisualize(t *testing.T, lg *zap.Logger, cfg Config, reports []report.ClientReport, persistedRequests []model.EtcdRequest, timeout time.Duration) (visualize func(basepath string) error) {
err := checkValidationAssumptions(reports, persistedRequests)
if err != nil {
t.Fatalf("Broken validation assumptions: %s", err)
}
patchedOperations := patchedOperationHistory(reports, persistedRequests)
linearizable, visualize := validateLinearizableOperationsAndVisualize(lg, patchedOperations, timeout)
if linearizable != porcupine.Ok {
t.Error("Failed linearization, skipping further validation")
return visualize
}
// TODO: Use requests from linearization instead of persisted requests from WAL.
err = validateWatch(lg, cfg, reports, persistedRequests)
if err != nil {
t.Errorf("Failed validating watch history, err: %s", err)
}
validateSerializableOperations(t, lg, patchedOperations, persistedRequests)
return visualize
}
type Config struct {
ExpectRevisionUnique bool
}
func checkValidationAssumptions(reports []report.ClientReport, persistedRequests []model.EtcdRequest) error {
err := validatePutOperationUnique(reports)
if err != nil {
return err
}
err = validateEmptyDatabaseAtStart(reports)
if err != nil {
return err
}
err = validatePersistedRequestMatchClientRequests(reports, persistedRequests)
if err != nil {
return err
}
err = validateNonConcurrentClientRequests(reports)
if err != nil {
return err
}
return nil
}
func validatePutOperationUnique(reports []report.ClientReport) error {
type KV struct {
Key string
Value model.ValueOrHash
}
putValue := map[KV]struct{}{}
for _, r := range reports {
for _, op := range r.KeyValue {
request := op.Input.(model.EtcdRequest)
if request.Type != model.Txn {
continue
}
for _, op := range append(request.Txn.OperationsOnSuccess, request.Txn.OperationsOnFailure...) {
if op.Type != model.PutOperation {
continue
}
kv := KV{
Key: op.Put.Key,
Value: op.Put.Value,
}
if _, ok := putValue[kv]; ok {
return fmt.Errorf("non unique put %v, required to patch operation history", kv)
}
putValue[kv] = struct{}{}
}
}
}
return nil
}
func validateEmptyDatabaseAtStart(reports []report.ClientReport) error {
for _, r := range reports {
for _, op := range r.KeyValue {
request := op.Input.(model.EtcdRequest)
response := op.Output.(model.MaybeEtcdResponse)
if response.Revision == 2 && !request.IsRead() {
return nil
}
}
}
return fmt.Errorf("non empty database at start or first write didn't succeed, required by model implementation")
}
func validatePersistedRequestMatchClientRequests(reports []report.ClientReport, persistedRequests []model.EtcdRequest) error {
persistedRequestSet := map[string]model.EtcdRequest{}
for _, request := range persistedRequests {
data, err := json.Marshal(request)
if err != nil {
return err
}
persistedRequestSet[string(data)] = request
}
clientRequests := map[string]porcupine.Operation{}
for _, r := range reports {
for _, op := range r.KeyValue {
request := op.Input.(model.EtcdRequest)
data, err := json.Marshal(request)
if err != nil {
return err
}
clientRequests[string(data)] = op
}
}
for requestDump, request := range persistedRequestSet {
_, found := clientRequests[requestDump]
// We cannot validate if persisted leaseGrant was sent by client as failed leaseGrant will not return LeaseID to clients.
if request.Type == model.LeaseGrant {
continue
}
if !found {
return fmt.Errorf("request %+v was not sent by client, required to validate", requestDump)
}
}
var firstOp, lastOp porcupine.Operation
for _, r := range reports {
for _, op := range r.KeyValue {
request := op.Input.(model.EtcdRequest)
response := op.Output.(model.MaybeEtcdResponse)
if response.Error != "" || request.IsRead() {
continue
}
if firstOp.Call == 0 || op.Call < firstOp.Call {
firstOp = op
}
if lastOp.Call == 0 || op.Call > lastOp.Call {
lastOp = op
}
}
}
firstOpData, err := json.Marshal(firstOp.Input.(model.EtcdRequest))
if err != nil {
return err
}
_, found := persistedRequestSet[string(firstOpData)]
if !found {
return fmt.Errorf("first succesful client write %s was not persisted, required to validate", firstOpData)
}
lastOpData, err := json.Marshal(lastOp.Input.(model.EtcdRequest))
if err != nil {
return err
}
_, found = persistedRequestSet[string(lastOpData)]
if !found {
return fmt.Errorf("last succesful client write %s was not persisted, required to validate", lastOpData)
}
return nil
}
func validateNonConcurrentClientRequests(reports []report.ClientReport) error {
lastClientRequestReturn := map[int]int64{}
for _, r := range reports {
for _, op := range r.KeyValue {
lastRequest := lastClientRequestReturn[op.ClientId]
if op.Call <= lastRequest {
return fmt.Errorf("client %d has concurrent request, required for operation linearization", op.ClientId)
}
if op.Return <= op.Call {
return fmt.Errorf("operation %v ends before it starts, required for operation linearization", op)
}
lastClientRequestReturn[op.ClientId] = op.Return
}
}
return nil
}