[NOD-484] Fix deadlock between p2p server and sync manager during shutdown (#508)

* [NOD-484] Fix deadlock between p2p server and sync manager during shutdown.

* [NOD-484] Fix quitWaitGroup.Wait() potentially not waiting in some scenarios.

* [NOD-484] Add a comment explaining quitWaitGroup.

* [NOD-484] Fix typo.

* [NOD-484] Add etc to comment.
This commit is contained in:
stasatdaglabs 2019-12-02 18:08:32 +02:00 committed by Svarog
parent 3ab861227d
commit bb7d68deda

View File

@ -277,12 +277,17 @@ type Server struct {
relayInv chan relayMsg relayInv chan relayMsg
broadcast chan broadcastMsg broadcast chan broadcastMsg
wg sync.WaitGroup wg sync.WaitGroup
quit chan struct{}
nat serverutils.NAT nat serverutils.NAT
db database.DB db database.DB
TimeSource blockdag.MedianTimeSource TimeSource blockdag.MedianTimeSource
services wire.ServiceFlag services wire.ServiceFlag
// We add to quitWaitGroup before every instance in which we wait for
// the quit channel so that all those instances finish before we shut
// down the managers (connManager, addrManager, etc),
quitWaitGroup sync.WaitGroup
quit chan struct{}
// The following fields are used for optional indexes. They will be nil // The following fields are used for optional indexes. They will be nil
// if the associated index is not enabled. These fields are set during // if the associated index is not enabled. These fields are set during
// initial creation of the server and never changed afterwards, so they // initial creation of the server and never changed afterwards, so they
@ -1167,6 +1172,8 @@ func (s *Server) peerHandler() {
s.addrManager.Start() s.addrManager.Start()
s.SyncManager.Start() s.SyncManager.Start()
s.quitWaitGroup.Add(1)
srvrLog.Tracef("Starting peer handler") srvrLog.Tracef("Starting peer handler")
state := &peerState{ state := &peerState{
@ -1232,6 +1239,7 @@ out:
sp.Disconnect() sp.Disconnect()
return true return true
}) })
s.quitWaitGroup.Done()
break out break out
case opcMsg := <-s.newOutboundConnection: case opcMsg := <-s.newOutboundConnection:
@ -1239,6 +1247,10 @@ out:
} }
} }
// Wait for all p2p server quit jobs to finish before stopping the
// various managers
s.quitWaitGroup.Wait()
s.connManager.Stop() s.connManager.Stop()
s.SyncManager.Stop() s.SyncManager.Stop()
s.addrManager.Stop() s.addrManager.Stop()
@ -1341,6 +1353,8 @@ func (s *Server) rebroadcastHandler() {
timer := time.NewTimer(5 * time.Minute) timer := time.NewTimer(5 * time.Minute)
pendingInvs := make(map[wire.InvVect]interface{}) pendingInvs := make(map[wire.InvVect]interface{})
s.quitWaitGroup.Add(1)
out: out:
for { for {
select { select {
@ -1388,6 +1402,7 @@ cleanup:
break cleanup break cleanup
} }
} }
s.quitWaitGroup.Done()
s.wg.Done() s.wg.Done()
} }
@ -1525,6 +1540,9 @@ func (s *Server) upnpUpdateThread() {
timer := time.NewTimer(0 * time.Second) timer := time.NewTimer(0 * time.Second)
lport, _ := strconv.ParseInt(config.ActiveConfig().NetParams().DefaultPort, 10, 16) lport, _ := strconv.ParseInt(config.ActiveConfig().NetParams().DefaultPort, 10, 16)
first := true first := true
s.quitWaitGroup.Add(1)
out: out:
for { for {
select { select {
@ -1570,6 +1588,7 @@ out:
srvrLog.Debugf("successfully disestablished UPnP port mapping") srvrLog.Debugf("successfully disestablished UPnP port mapping")
} }
s.quitWaitGroup.Done()
s.wg.Done() s.wg.Done()
} }