From dabf6978cc1991ad6ed606b07e168a8979150df8 Mon Sep 17 00:00:00 2001 From: ahrtr Date: Sun, 29 May 2022 15:08:59 +0800 Subject: [PATCH] Fix potential deadlock between Revoke and (Grant or Checkpoint) --- server/lease/lessor.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/server/lease/lessor.go b/server/lease/lessor.go index d1599d60e..9ed719f18 100644 --- a/server/lease/lessor.go +++ b/server/lease/lessor.go @@ -286,6 +286,10 @@ func (le *lessor) Grant(id LeaseID, ttl int64) (*Lease, error) { revokec: make(chan struct{}), } + if l.ttl < le.minLeaseTTL { + l.ttl = le.minLeaseTTL + } + le.mu.Lock() defer le.mu.Unlock() @@ -293,10 +297,6 @@ func (le *lessor) Grant(id LeaseID, ttl int64) (*Lease, error) { return nil, ErrLeaseExists } - if l.ttl < le.minLeaseTTL { - l.ttl = le.minLeaseTTL - } - if le.isPrimary() { l.refresh(0) } else { @@ -326,6 +326,12 @@ func (le *lessor) Revoke(id LeaseID) error { le.mu.Unlock() return ErrLeaseNotFound } + + // We shouldn't delete the lease inside the transaction lock, otherwise + // it may lead to deadlock with Grant or Checkpoint operations, which + // acquire the le.mu firstly and then the batchTx lock. + delete(le.leaseMap, id) + defer close(l.revokec) // unlock before doing external work le.mu.Unlock() @@ -344,9 +350,6 @@ func (le *lessor) Revoke(id LeaseID) error { txn.DeleteRange([]byte(key), nil) } - le.mu.Lock() - defer le.mu.Unlock() - delete(le.leaseMap, l.ID) // lease deletion needs to be in the same backend transaction with the // kv deletion. Or we might end up with not executing the revoke or not // deleting the keys if etcdserver fails in between.