mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #11613 from jpbetz/fix-defrag-orphan-file
mvcc/backend: Fix corruption bug in defrag
This commit is contained in:
@@ -88,6 +88,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
|
||||
- `etcd --experimental-backend-bbolt-freelist-type` has been deprecated.
|
||||
- Support [rollback/downgrade](TODO).
|
||||
- Deprecate v2 apply on cluster version. [Use v3 request to set cluster version and recover cluster version from v3 backend](https://github.com/etcd-io/etcd/pull/11427).
|
||||
- [Fix corruption bug in defrag](https://github.com/etcd-io/etcd/pull/11613).
|
||||
|
||||
### Package `embed`
|
||||
|
||||
|
||||
@@ -206,6 +206,9 @@ func (s *Snapshotter) snapNames() ([]string, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = s.cleanupSnapdir(names); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
snaps := checkSuffix(s.lg, names)
|
||||
if len(snaps) == 0 {
|
||||
return nil, ErrNoSnapshot
|
||||
@@ -231,3 +234,19 @@ func checkSuffix(lg *zap.Logger, names []string) []string {
|
||||
}
|
||||
return snaps
|
||||
}
|
||||
|
||||
// cleanupSnapdir removes any files that should not be in the snapshot directory:
|
||||
// - db.tmp prefixed files that can be orphaned by defragmentation
|
||||
func (s *Snapshotter) cleanupSnapdir(filenames []string) error {
|
||||
for _, filename := range filenames {
|
||||
if strings.HasPrefix(filename, "db.tmp") {
|
||||
if s.lg != nil {
|
||||
s.lg.Info("found orphaned defragmentation file; deleting", zap.String("path", filename))
|
||||
if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) {
|
||||
return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -358,13 +358,24 @@ func (b *backend) defrag() error {
|
||||
|
||||
b.batchTx.tx = nil
|
||||
|
||||
tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions)
|
||||
// Create a temporary file to ensure we start with a clean slate.
|
||||
// Snapshotter.cleanupSnapdir cleans up any of these that are found during startup.
|
||||
dir := filepath.Dir(b.db.Path())
|
||||
temp, err := ioutil.TempFile(dir, "db.tmp.*")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
options := *boltOpenOptions
|
||||
options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) {
|
||||
return temp, nil
|
||||
}
|
||||
tdbp := temp.Name()
|
||||
tmpdb, err := bolt.Open(tdbp, 0600, &options)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dbp := b.db.Path()
|
||||
tdbp := tmpdb.Path()
|
||||
size1, sizeInUse1 := b.Size(), b.SizeInUse()
|
||||
if b.lg != nil {
|
||||
b.lg.Info(
|
||||
@@ -376,12 +387,12 @@ func (b *backend) defrag() error {
|
||||
zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse1))),
|
||||
)
|
||||
}
|
||||
|
||||
// gofail: var defragBeforeCopy struct{}
|
||||
err = defragdb(b.db, tmpdb, defragLimit)
|
||||
if err != nil {
|
||||
tmpdb.Close()
|
||||
if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil {
|
||||
b.lg.Error("failed to remove dirs under tmpdb", zap.Error(rmErr))
|
||||
b.lg.Error("failed to remove db.tmp after defragmentation completed", zap.Error(rmErr))
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -394,6 +405,7 @@ func (b *backend) defrag() error {
|
||||
if err != nil {
|
||||
b.lg.Fatal("failed to close tmp database", zap.Error(err))
|
||||
}
|
||||
// gofail: var defragBeforeRename struct{}
|
||||
err = os.Rename(tdbp, dbp)
|
||||
if err != nil {
|
||||
b.lg.Fatal("failed to rename tmp database", zap.Error(err))
|
||||
|
||||
Reference in New Issue
Block a user