Merge pull request #17705 from fuweid/34-cp-17462

[3.4] *: LeaseTimeToLive returns error if leader changed
This commit is contained in:
Benjamin Wang 2024-04-05 09:01:00 +01:00 committed by GitHub
commit bb903a36ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 127 additions and 4 deletions

View File

@ -38,13 +38,16 @@ jobs:
GOARCH=amd64 PASSES='fmt bom dep' ./test
;;
linux-amd64-integration-1-cpu)
GOARCH=amd64 CPU=1 RACE='false' make test-integration
make install-gofail
GOARCH=amd64 CPU=1 RACE='false' FAILPOINTS='true' make test-integration
;;
linux-amd64-integration-2-cpu)
GOARCH=amd64 CPU=2 RACE='false' make test-integration
make install-gofail
GOARCH=amd64 CPU=2 RACE='false' FAILPOINTS='true' make test-integration
;;
linux-amd64-integration-4-cpu)
GOARCH=amd64 CPU=4 RACE='false' make test-integration
make install-gofail
GOARCH=amd64 CPU=4 RACE='false' FAILPOINTS='true' make test-integration
;;
linux-amd64-functional)
./build && GOARCH=amd64 PASSES='functional' ./test

View File

@ -538,3 +538,12 @@ GOFAIL_VERSION = $(shell cd tools/mod && go list -m -f {{.Version}} go.etcd.io/g
.PHONY: install-gofail
install-gofail:
go install go.etcd.io/gofail@${GOFAIL_VERSION}
.PHONY: gofail-enable
gofail-enable: install-gofail
PASSES="toggle_failpoints" FAILPOINTS=true ./test
.PHONY: gofail-disable
gofail-disable: install-gofail
PASSES="toggle_failpoints" ./test

2
build
View File

@ -21,7 +21,7 @@ GOFAIL_VERSION=$(cd tools/mod && go list -m -f "{{.Version}}" go.etcd.io/gofail)
toggle_failpoints() {
mode="$1"
if command -v gofail >/dev/null 2>&1; then
gofail "$mode" etcdserver/ mvcc/ mvcc/backend/ wal/
gofail "$mode" etcdserver/ lease/leasehttp/ mvcc/ mvcc/backend/ wal/
# shellcheck disable=SC2086
if [[ "$mode" == "enable" ]]; then
go get go.etcd.io/gofail@${GOFAIL_VERSION}

View File

@ -363,6 +363,9 @@ func (s *EtcdServer) leaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveR
if err := s.waitAppliedIndex(); err != nil {
return nil, err
}
// gofail: var beforeLookupWhenLeaseTimeToLive struct{}
// primary; timetolive directly from leader
le := s.lessor.Lookup(lease.LeaseID(r.ID))
if le == nil {
@ -378,6 +381,15 @@ func (s *EtcdServer) leaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveR
}
resp.Keys = kbs
}
// The leasor could be demoted if leader changed during lookup.
// We should return error to force retry instead of returning
// incorrect remaining TTL.
if le.Demoted() {
// NOTE: lease.ErrNotPrimary is not retryable error for
// client. Instead, uses ErrLeaderChanged.
return nil, ErrLeaderChanged
}
return resp, nil
}

1
go.mod
View File

@ -33,6 +33,7 @@ require (
github.com/urfave/cli v1.20.0
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2
go.etcd.io/bbolt v1.3.9
go.etcd.io/gofail v0.1.0
go.uber.org/zap v1.10.0
golang.org/x/crypto v0.21.0
golang.org/x/net v0.23.0

2
go.sum
View File

@ -177,6 +177,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.3.9 h1:8x7aARPEXiXbHmtUwAIv7eV2fQFHrLLavdiJ3uzJXoI=
go.etcd.io/bbolt v1.3.9/go.mod h1:zaO32+Ti0PK1ivdPtgMESzuzL2VPoIG1PCQNvOdo/dE=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.uber.org/atomic v1.3.2 h1:2Oa65PReHzfn29GpvgsYwloV9AVFHPDk8tYxt2c2tr4=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=

View File

@ -21,10 +21,14 @@ import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
"go.etcd.io/etcd/mvcc/mvccpb"
"go.etcd.io/etcd/pkg/testutil"
gofail "go.etcd.io/gofail/runtime"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
@ -1021,6 +1025,76 @@ func TestV3LeaseRecoverKeyWithMutipleLease(t *testing.T) {
}
}
func TestV3LeaseTimeToLiveWithLeaderChanged(t *testing.T) {
t.Run("normal", func(subT *testing.T) {
testV3LeaseTimeToLiveWithLeaderChanged(subT, "beforeLookupWhenLeaseTimeToLive")
})
t.Run("forward", func(subT *testing.T) {
testV3LeaseTimeToLiveWithLeaderChanged(subT, "beforeLookupWhenForwardLeaseTimeToLive")
})
}
func testV3LeaseTimeToLiveWithLeaderChanged(t *testing.T, fpName string) {
if len(gofail.List()) == 0 {
t.Skip("please run 'make gofail-enable' before running the test")
}
clus := NewClusterV3(t, &ClusterConfig{Size: 3})
defer clus.Terminate(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
oldLeadIdx := clus.WaitLeader(t)
followerIdx := (oldLeadIdx + 1) % 3
followerMemberID := clus.Members[followerIdx].ID()
oldLeadC := clus.Client(oldLeadIdx)
leaseResp, err := oldLeadC.Grant(ctx, 100)
require.NoError(t, err)
require.NoError(t, gofail.Enable(fpName, `sleep("3s")`))
t.Cleanup(func() {
terr := gofail.Disable(fpName)
if terr != nil && terr != gofail.ErrDisabled {
t.Fatalf("failed to disable %s: %v", fpName, terr)
}
})
readyCh := make(chan struct{})
errCh := make(chan error, 1)
var targetC *clientv3.Client
switch fpName {
case "beforeLookupWhenLeaseTimeToLive":
targetC = oldLeadC
case "beforeLookupWhenForwardLeaseTimeToLive":
targetC = clus.Client((oldLeadIdx + 2) % 3)
default:
t.Fatalf("unsupported %s failpoint", fpName)
}
go func() {
<-readyCh
time.Sleep(1 * time.Second)
_, merr := oldLeadC.MoveLeader(ctx, uint64(followerMemberID))
assert.NoError(t, gofail.Disable(fpName))
errCh <- merr
}()
close(readyCh)
ttlResp, err := targetC.TimeToLive(ctx, leaseResp.ID)
require.NoError(t, err)
require.GreaterOrEqual(t, int64(100), ttlResp.TTL)
require.NoError(t, <-errCh)
}
// acquireLeaseAndKey creates a new lease and creates an attached key.
func acquireLeaseAndKey(clus *ClusterV3, key string) (int64, error) {
// create lease

View File

@ -103,6 +103,9 @@ func (h *leaseHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
http.Error(w, ErrLeaseHTTPTimeout.Error(), http.StatusRequestTimeout)
return
}
// gofail: var beforeLookupWhenForwardLeaseTimeToLive struct{}
l := h.l.Lookup(lease.LeaseID(lreq.LeaseTimeToLiveRequest.ID))
if l == nil {
http.Error(w, lease.ErrLeaseNotFound.Error(), http.StatusNotFound)
@ -126,6 +129,14 @@ func (h *leaseHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
resp.LeaseTimeToLiveResponse.Keys = kbs
}
// The leasor could be demoted if leader changed during lookup.
// We should return error to force retry instead of returning
// incorrect remaining TTL.
if l.Demoted() {
http.Error(w, lease.ErrNotPrimary.Error(), http.StatusInternalServerError)
return
}
v, err = resp.Marshal()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)

View File

@ -901,6 +901,13 @@ func (l *Lease) forever() {
l.expiry = forever
}
// Demoted returns true if the lease's expiry has been reset to forever.
func (l *Lease) Demoted() bool {
l.expiryMu.Lock()
defer l.expiryMu.Unlock()
return l.expiry == forever
}
// Keys returns all the keys attached to the lease.
func (l *Lease) Keys() []string {
l.mu.RLock()

4
test
View File

@ -690,6 +690,10 @@ function build_cov_pass {
go test -tags cov -c -covermode=set -coverpkg="$PKGS_COMMA" -o "${out}/etcdctl_test" "${REPO_PATH}/etcdctl"
}
function toggle_failpoints_pass {
toggle_failpoints_default
}
# fail fast on static tests
function build_pass {
echo "Checking build..."