Merge pull request #17864 from fuweid/backport-17815-34

[3.4] fix revision loss issue caused by compaction - 17780
2024-09-27 06:25:44 +00:00 · 2024-04-25 14:26:14 +01:00 · 2024-04-25 14:26:14 +01:00 · 1d02c16e2a
commit 1d02c16e2a
parent 48b0c496c0 4cb197e0fa
4 changed files with 130 additions and 0 deletions
--- a/mvcc/kvstore.go
+++ b/mvcc/kvstore.go
@ -450,6 +450,17 @@ func (s *store) restore() error {
 			s.currentRev = s.compactMainRev
 		}
 		// If the latest revision was a tombstone revision and etcd just compacted
 		// it, but crashed right before persisting the FinishedCompactRevision,
 		// then it would lead to revision decreasing in bbolt db file. In such
 		// a scenario, we should adjust the current revision using the scheduled
 		// compact revision on bootstrap when etcd gets started again.
 		//
 		// See https://github.com/etcd-io/etcd/issues/17780#issuecomment-2061900231
 		if s.currentRev < scheduledCompact {
 			s.currentRev = scheduledCompact
 		}
 		if scheduledCompact <= s.compactMainRev {
 			scheduledCompact = 0
 		}
--- a/mvcc/kvstore_compaction.go
+++ b/mvcc/kvstore_compaction.go
@ -49,6 +49,7 @@ func (s *store) scheduleCompaction(compactMainRev int64, keep map[revision]struc
 		}
 		if len(keys) < s.cfg.CompactionBatchLimit {
 			// gofail: var compactBeforeSetFinishedCompact struct{}
 			rbytes := make([]byte, 8+1+8)
 			revToBytes(revision{main: compactMainRev}, rbytes)
 			tx.UnsafePut(metaBucketName, finishedCompactKeyName, rbytes)
--- a/tests/e2e/cluster_test.go
+++ b/tests/e2e/cluster_test.go
@ -140,6 +140,7 @@ type etcdProcessClusterConfig struct {
 	MaxConcurrentStreams       uint32 // default is math.MaxUint32
 	WatchProcessNotifyInterval time.Duration
 	CompactionBatchLimit       int
 	debug bool
@ -333,6 +334,9 @@ func (cfg *etcdProcessClusterConfig) etcdServerProcessConfigs() []*etcdServerPro
 		if cfg.WatchProcessNotifyInterval != 0 {
 			args = append(args, "--experimental-watch-progress-notify-interval", cfg.WatchProcessNotifyInterval.String())
 		}
 		if cfg.CompactionBatchLimit != 0 {
 			args = append(args, "--experimental-compaction-batch-limit", fmt.Sprintf("%d", cfg.CompactionBatchLimit))
 		}
 		if cfg.debug {
 			args = append(args, "--debug")
--- a/tests/e2e/reproduce_17780_test.go
+++ b/tests/e2e/reproduce_17780_test.go
@ -0,0 +1,114 @@
 // Copyright 2024 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package e2e
 import (
 	"context"
 	"fmt"
 	"testing"
 	"time"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"go.etcd.io/etcd/clientv3"
 	"go.etcd.io/etcd/pkg/stringutil"
 	"go.etcd.io/etcd/pkg/testutil"
 )
 // TestReproduce17780 reproduces the issue: https://github.com/etcd-io/etcd/issues/17780.
 func TestReproduce17780(t *testing.T) {
 	defer testutil.AfterTest(t)
 	compactionBatchLimit := 10
 	ctx := context.TODO()
 	clus, cerr := newEtcdProcessCluster(t, &etcdProcessClusterConfig{
 		clusterSize:                3,
 		goFailEnabled:              true,
 		goFailClientTimeout:        40 * time.Second,
 		snapshotCount:              1000,
 		CompactionBatchLimit:       compactionBatchLimit,
 		WatchProcessNotifyInterval: 100 * time.Millisecond,
 	})
 	require.NoError(t, cerr)
 	t.Cleanup(func() { require.NoError(t, clus.Stop()) })
 	leaderIdx := clus.WaitLeader(t)
 	targetIdx := (leaderIdx + 1) % clus.cfg.clusterSize
 	cli := newClient(t, clus.procs[targetIdx].EndpointsGRPC(), clientNonTLS, false)
 	// Revision: 2 -> 8 for new keys
 	n := compactionBatchLimit - 2
 	valueSize := 16
 	for i := 2; i <= n; i++ {
 		_, err := cli.Put(ctx, fmt.Sprintf("%d", i), stringutil.RandString(uint(valueSize)))
 		require.NoError(t, err)
 	}
 	// Revision: 9 -> 11 for delete keys with compared revision
 	//
 	// We need last compaction batch is no-op and all the tombstones should
 	// be deleted in previous compaction batch. So that we just lost the
 	// finishedCompactRev after panic.
 	for i := 9; i <= compactionBatchLimit+1; i++ {
 		rev := i - 5
 		key := fmt.Sprintf("%d", rev)
 		_, err := cli.Delete(ctx, key)
 		require.NoError(t, err)
 	}
 	require.NoError(t, clus.procs[targetIdx].Failpoints().SetupHTTP(ctx, "compactBeforeSetFinishedCompact", `panic`))
 	_, err := cli.Compact(ctx, 11, clientv3.WithCompactPhysical())
 	require.Error(t, err)
 	require.Error(t, clus.procs[targetIdx].Stop())
 	// NOTE: The proc panics and exit code is 2. It's impossible to restart
 	// that etcd proc because last exit code is 2 and Restart() refuses to
 	// start new one. Using IsRunning() function is to cleanup status.
 	require.False(t, clus.procs[targetIdx].IsRunning())
 	require.NoError(t, clus.procs[targetIdx].Restart())
 	// NOTE: We should not decrease the revision if there is no record
 	// about finished compact operation.
 	resp, err := cli.Get(ctx, fmt.Sprintf("%d", n))
 	require.NoError(t, err)
 	assert.GreaterOrEqual(t, resp.Header.Revision, int64(11))
 	// Revision 4 should be deleted by compaction.
 	resp, err = cli.Get(ctx, fmt.Sprintf("%d", 4))
 	require.NoError(t, err)
 	require.True(t, resp.Count == 0)
 	next := 20
 	for i := 12; i <= next; i++ {
 		_, err := cli.Put(ctx, fmt.Sprintf("%d", i), stringutil.RandString(uint(valueSize)))
 		require.NoError(t, err)
 	}
 	expectedRevision := next
 	for procIdx, proc := range clus.procs {
 		cli = newClient(t, proc.EndpointsGRPC(), clientNonTLS, false)
 		resp, err := cli.Get(ctx, fmt.Sprintf("%d", next))
 		require.NoError(t, err)
 		assert.GreaterOrEqual(t, resp.Header.Revision, int64(expectedRevision),
 			fmt.Sprintf("LeaderIdx: %d, Current: %d", leaderIdx, procIdx))
 	}
 }