From d69adf45f9e95661966204460f019b8164aaef12 Mon Sep 17 00:00:00 2001 From: Ivan Valdes Date: Tue, 13 Feb 2024 13:24:44 -0800 Subject: [PATCH 1/2] server: Implement WithMmapSize option for backend config Accept a third argument for NewDefaultBackend for overrides to the BackendConfig. Add a new function, WithMmapSize, which modifies the backend config to provide a custom InitiamMmapSize. Signed-off-by: Ivan Valdes --- server/storage/backend/backend.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index 7aa4f8469..4b2a49cec 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -151,6 +151,8 @@ type BackendConfig struct { Hooks Hooks } +type BackendConfigOption func(*BackendConfig) + func DefaultBackendConfig(lg *zap.Logger) BackendConfig { return BackendConfig{ BatchInterval: defaultBatchInterval, @@ -164,9 +166,19 @@ func New(bcfg BackendConfig) Backend { return newBackend(bcfg) } -func NewDefaultBackend(lg *zap.Logger, path string) Backend { +func WithMmapSize(size uint64) BackendConfigOption { + return func(bcfg *BackendConfig) { + bcfg.MmapSize = size + } +} + +func NewDefaultBackend(lg *zap.Logger, path string, opts ...BackendConfigOption) Backend { bcfg := DefaultBackendConfig(lg) bcfg.Path = path + for _, opt := range opts { + opt(&bcfg) + } + return newBackend(bcfg) } From be2883321240340479f9e334bc4a3b2959ad7639 Mon Sep 17 00:00:00 2001 From: Ivan Valdes Date: Tue, 13 Feb 2024 13:30:30 -0800 Subject: [PATCH 2/2] etcdutl: Fix snapshot restore memory alloc issue When running the snapshot command, allow receiving an initial memory map allocation for the database, avoiding future memory allocation issues. Co-authored-by: Benjamin Wang Co-authored-by: Fatih USTA Signed-off-by: Ivan Valdes --- etcdutl/etcdutl/snapshot_command.go | 7 ++++++- etcdutl/snapshot/v3_snapshot.go | 15 +++++++++++---- server/storage/backend/backend.go | 6 +++--- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/etcdutl/etcdutl/snapshot_command.go b/etcdutl/etcdutl/snapshot_command.go index 9e557495e..eeffb93bb 100644 --- a/etcdutl/etcdutl/snapshot_command.go +++ b/etcdutl/etcdutl/snapshot_command.go @@ -22,6 +22,7 @@ import ( "go.etcd.io/etcd/etcdutl/v3/snapshot" "go.etcd.io/etcd/pkg/v3/cobrautl" + "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/server/v3/storage/datadir" ) @@ -38,6 +39,7 @@ var ( restorePeerURLs string restoreName string skipHashCheck bool + initialMmapSize = backend.InitialMmapSize markCompacted bool revisionBump uint64 ) @@ -77,6 +79,7 @@ func NewSnapshotRestoreCommand() *cobra.Command { cmd.Flags().StringVar(&restorePeerURLs, "initial-advertise-peer-urls", defaultInitialAdvertisePeerURLs, "List of this member's peer URLs to advertise to the rest of the cluster") cmd.Flags().StringVar(&restoreName, "name", defaultName, "Human-readable name for this member") cmd.Flags().BoolVar(&skipHashCheck, "skip-hash-check", false, "Ignore snapshot integrity hash value (required if copied from data directory)") + cmd.Flags().Uint64Var(&initialMmapSize, "initial-memory-map-size", initialMmapSize, "Initial memory map size of the database in bytes. It uses the default value if not defined or defined to 0") cmd.Flags().Uint64Var(&revisionBump, "bump-revision", 0, "How much to increase the latest revision after restore") cmd.Flags().BoolVar(&markCompacted, "mark-compacted", false, "Mark the latest revision after restore as the point of scheduled compaction (required if --bump-revision > 0, disallowed otherwise)") @@ -104,7 +107,7 @@ func SnapshotStatusCommandFunc(cmd *cobra.Command, args []string) { func snapshotRestoreCommandFunc(_ *cobra.Command, args []string) { SnapshotRestoreCommandFunc(restoreCluster, restoreClusterToken, restoreDataDir, restoreWalDir, - restorePeerURLs, restoreName, skipHashCheck, revisionBump, markCompacted, args) + restorePeerURLs, restoreName, skipHashCheck, initialMmapSize, revisionBump, markCompacted, args) } func SnapshotRestoreCommandFunc(restoreCluster string, @@ -114,6 +117,7 @@ func SnapshotRestoreCommandFunc(restoreCluster string, restorePeerURLs string, restoreName string, skipHashCheck bool, + initialMmapSize uint64, revisionBump uint64, markCompacted bool, args []string) { @@ -149,6 +153,7 @@ func SnapshotRestoreCommandFunc(restoreCluster string, InitialCluster: restoreCluster, InitialClusterToken: restoreClusterToken, SkipHashCheck: skipHashCheck, + InitialMmapSize: initialMmapSize, RevisionBump: revisionBump, MarkCompacted: markCompacted, }); err != nil { diff --git a/etcdutl/snapshot/v3_snapshot.go b/etcdutl/snapshot/v3_snapshot.go index 64d16acd7..2391a1566 100644 --- a/etcdutl/snapshot/v3_snapshot.go +++ b/etcdutl/snapshot/v3_snapshot.go @@ -83,7 +83,8 @@ type v3Manager struct { snapDir string cl *membership.RaftCluster - skipHashCheck bool + skipHashCheck bool + initialMmapSize uint64 } // hasChecksum returns "true" if the file size "n" @@ -204,6 +205,9 @@ type RestoreConfig struct { // (required if copied from data directory). SkipHashCheck bool + // InitialMmapSize is the database initial memory map size. + InitialMmapSize uint64 + // RevisionBump is the amount to increase the latest revision after restore, // to allow administrators to trick clients into thinking that revision never decreased. // If 0, revision bumping is skipped. @@ -263,6 +267,7 @@ func (s *v3Manager) Restore(cfg RestoreConfig) error { s.walDir = walDir s.snapDir = filepath.Join(dataDir, "member", "snap") s.skipHashCheck = cfg.SkipHashCheck + s.initialMmapSize = cfg.InitialMmapSize s.lg.Info( "restoring snapshot", @@ -270,6 +275,7 @@ func (s *v3Manager) Restore(cfg RestoreConfig) error { zap.String("wal-dir", s.walDir), zap.String("data-dir", dataDir), zap.String("snap-dir", s.snapDir), + zap.Uint64("initial-memory-map-size", s.initialMmapSize), ) if err = s.saveDB(); err != nil { @@ -297,6 +303,7 @@ func (s *v3Manager) Restore(cfg RestoreConfig) error { zap.String("wal-dir", s.walDir), zap.String("data-dir", dataDir), zap.String("snap-dir", s.snapDir), + zap.Uint64("initial-memory-map-size", s.initialMmapSize), ) return verify.VerifyIfEnabled(verify.Config{ @@ -317,7 +324,7 @@ func (s *v3Manager) saveDB() error { return err } - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), backend.WithMmapSize(s.initialMmapSize)) defer be.Close() err = schema.NewMembershipBackend(s.lg, be).TrimMembershipFromBackend() @@ -472,7 +479,7 @@ func (s *v3Manager) saveWALAndSnap() (*raftpb.HardState, error) { } // add members again to persist them to the backend we create. - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), backend.WithMmapSize(s.initialMmapSize)) defer be.Close() s.cl.SetBackend(schema.NewMembershipBackend(s.lg, be)) for _, m := range s.cl.Members() { @@ -551,7 +558,7 @@ func (s *v3Manager) saveWALAndSnap() (*raftpb.HardState, error) { } func (s *v3Manager) updateCIndex(commit uint64, term uint64) error { - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), backend.WithMmapSize(s.initialMmapSize)) defer be.Close() cindex.UpdateConsistentIndexForce(be.BatchTx(), commit, term) diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index 4b2a49cec..28bafe4ed 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -36,10 +36,10 @@ var ( defragLimit = 10000 - // initialMmapSize is the initial size of the mmapped region. Setting this larger than + // InitialMmapSize is the initial size of the mmapped region. Setting this larger than // the potential max db size can prevent writer from blocking reader. // This only works for linux. - initialMmapSize = uint64(10 * 1024 * 1024 * 1024) + InitialMmapSize = uint64(10 * 1024 * 1024 * 1024) // minSnapshotWarningTimeout is the minimum threshold to trigger a long running snapshot warning. minSnapshotWarningTimeout = 30 * time.Second @@ -157,7 +157,7 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig { return BackendConfig{ BatchInterval: defaultBatchInterval, BatchLimit: defaultBatchLimit, - MmapSize: initialMmapSize, + MmapSize: InitialMmapSize, Logger: lg, } }