mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #675 from unihorn/56
fix(peer_server): exit all server goroutines in Stop()
This commit is contained in:
commit
d78116c35b
@ -33,7 +33,7 @@ func init() {
|
|||||||
defaultDiscoverer = &Discoverer{}
|
defaultDiscoverer = &Discoverer{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []string, err error) {
|
func (d *Discoverer) Do(discoveryURL string, name string, peer string, closeChan <-chan bool, startRoutine func(func())) (peers []string, err error) {
|
||||||
d.name = name
|
d.name = name
|
||||||
d.peer = peer
|
d.peer = peer
|
||||||
d.discoveryURL = discoveryURL
|
d.discoveryURL = discoveryURL
|
||||||
@ -68,7 +68,7 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []
|
|||||||
|
|
||||||
// Start the very slow heartbeat to the cluster now in anticipation
|
// Start the very slow heartbeat to the cluster now in anticipation
|
||||||
// that everything is going to go alright now
|
// that everything is going to go alright now
|
||||||
go d.startHeartbeat()
|
startRoutine(func() { d.startHeartbeat(closeChan) })
|
||||||
|
|
||||||
// Attempt to take the leadership role, if there is no error we are it!
|
// Attempt to take the leadership role, if there is no error we are it!
|
||||||
resp, err := d.client.Create(path.Join(d.prefix, stateKey), startedState, 0)
|
resp, err := d.client.Create(path.Join(d.prefix, stateKey), startedState, 0)
|
||||||
@ -120,17 +120,20 @@ func (d *Discoverer) findPeers() (peers []string, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *Discoverer) startHeartbeat() {
|
func (d *Discoverer) startHeartbeat(closeChan <-chan bool) {
|
||||||
// In case of errors we should attempt to heartbeat fairly frequently
|
// In case of errors we should attempt to heartbeat fairly frequently
|
||||||
heartbeatInterval := defaultTTL / 8
|
heartbeatInterval := defaultTTL / 8
|
||||||
ticker := time.Tick(time.Second * time.Duration(heartbeatInterval))
|
ticker := time.NewTicker(time.Second * time.Duration(heartbeatInterval))
|
||||||
|
defer ticker.Stop()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ticker:
|
case <-ticker.C:
|
||||||
err := d.heartbeat()
|
err := d.heartbeat()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("Discovery heartbeat failed: %v", err)
|
log.Warnf("Discovery heartbeat failed: %v", err)
|
||||||
}
|
}
|
||||||
|
case <-closeChan:
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -140,6 +143,6 @@ func (d *Discoverer) heartbeat() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func Do(discoveryURL string, name string, peer string) ([]string, error) {
|
func Do(discoveryURL string, name string, peer string, closeChan <-chan bool, startRoutine func(func())) ([]string, error) {
|
||||||
return defaultDiscoverer.Do(discoveryURL, name, peer)
|
return defaultDiscoverer.Do(discoveryURL, name, peer, closeChan, startRoutine)
|
||||||
}
|
}
|
||||||
|
@ -68,6 +68,7 @@ type PeerServer struct {
|
|||||||
mode Mode
|
mode Mode
|
||||||
|
|
||||||
closeChan chan bool
|
closeChan chan bool
|
||||||
|
routineGroup sync.WaitGroup
|
||||||
timeoutThresholdChan chan interface{}
|
timeoutThresholdChan chan interface{}
|
||||||
|
|
||||||
standbyPeerURL string
|
standbyPeerURL string
|
||||||
@ -293,14 +294,14 @@ func (s *PeerServer) Start(snapshot bool, discoverURL string, peers []string) er
|
|||||||
|
|
||||||
s.closeChan = make(chan bool)
|
s.closeChan = make(chan bool)
|
||||||
|
|
||||||
go s.monitorSync()
|
s.startRoutine(s.monitorSync)
|
||||||
go s.monitorTimeoutThreshold(s.closeChan)
|
s.startRoutine(s.monitorTimeoutThreshold)
|
||||||
go s.monitorActiveSize(s.closeChan)
|
s.startRoutine(s.monitorActiveSize)
|
||||||
go s.monitorPeerActivity(s.closeChan)
|
s.startRoutine(s.monitorPeerActivity)
|
||||||
|
|
||||||
// open the snapshot
|
// open the snapshot
|
||||||
if snapshot {
|
if snapshot {
|
||||||
go s.monitorSnapshot()
|
s.startRoutine(s.monitorSnapshot)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -312,9 +313,10 @@ func (s *PeerServer) Stop() {
|
|||||||
|
|
||||||
if s.closeChan != nil {
|
if s.closeChan != nil {
|
||||||
close(s.closeChan)
|
close(s.closeChan)
|
||||||
s.closeChan = nil
|
|
||||||
}
|
}
|
||||||
s.raftServer.Stop()
|
s.raftServer.Stop()
|
||||||
|
s.routineGroup.Wait()
|
||||||
|
s.closeChan = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *PeerServer) HTTPHandler() http.Handler {
|
func (s *PeerServer) HTTPHandler() http.Handler {
|
||||||
@ -454,7 +456,7 @@ func (s *PeerServer) checkPeerAddressNonconflict() bool {
|
|||||||
|
|
||||||
// Helper function to do discovery and return results in expected format
|
// Helper function to do discovery and return results in expected format
|
||||||
func (s *PeerServer) handleDiscovery(discoverURL string) (peers []string, err error) {
|
func (s *PeerServer) handleDiscovery(discoverURL string) (peers []string, err error) {
|
||||||
peers, err = discovery.Do(discoverURL, s.Config.Name, s.Config.URL)
|
peers, err = discovery.Do(discoverURL, s.Config.Name, s.Config.URL, s.closeChan, s.startRoutine)
|
||||||
|
|
||||||
// Warn about errors coming from discovery, this isn't fatal
|
// Warn about errors coming from discovery, this isn't fatal
|
||||||
// since the user might have provided a peer list elsewhere,
|
// since the user might have provided a peer list elsewhere,
|
||||||
@ -698,9 +700,24 @@ func (s *PeerServer) logSnapshot(err error, currentIndex, count uint64) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *PeerServer) startRoutine(f func()) {
|
||||||
|
s.routineGroup.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer s.routineGroup.Done()
|
||||||
|
f()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *PeerServer) monitorSnapshot() {
|
func (s *PeerServer) monitorSnapshot() {
|
||||||
for {
|
for {
|
||||||
time.Sleep(s.snapConf.checkingInterval)
|
timer := time.NewTimer(s.snapConf.checkingInterval)
|
||||||
|
defer timer.Stop()
|
||||||
|
select {
|
||||||
|
case <-s.closeChan:
|
||||||
|
return
|
||||||
|
case <-timer.C:
|
||||||
|
}
|
||||||
|
|
||||||
currentIndex := s.RaftServer().CommitIndex()
|
currentIndex := s.RaftServer().CommitIndex()
|
||||||
count := currentIndex - s.snapConf.lastIndex
|
count := currentIndex - s.snapConf.lastIndex
|
||||||
if uint64(count) > s.snapConf.snapshotThr {
|
if uint64(count) > s.snapConf.snapshotThr {
|
||||||
@ -712,10 +729,13 @@ func (s *PeerServer) monitorSnapshot() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *PeerServer) monitorSync() {
|
func (s *PeerServer) monitorSync() {
|
||||||
ticker := time.Tick(time.Millisecond * 500)
|
ticker := time.NewTicker(time.Millisecond * 500)
|
||||||
|
defer ticker.Stop()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case now := <-ticker:
|
case <-s.closeChan:
|
||||||
|
return
|
||||||
|
case now := <-ticker.C:
|
||||||
if s.raftServer.State() == raft.Leader {
|
if s.raftServer.State() == raft.Leader {
|
||||||
s.raftServer.Do(s.store.CommandFactory().CreateSyncCommand(now))
|
s.raftServer.Do(s.store.CommandFactory().CreateSyncCommand(now))
|
||||||
}
|
}
|
||||||
@ -725,27 +745,35 @@ func (s *PeerServer) monitorSync() {
|
|||||||
|
|
||||||
// monitorTimeoutThreshold groups timeout threshold events together and prints
|
// monitorTimeoutThreshold groups timeout threshold events together and prints
|
||||||
// them as a single log line.
|
// them as a single log line.
|
||||||
func (s *PeerServer) monitorTimeoutThreshold(closeChan chan bool) {
|
func (s *PeerServer) monitorTimeoutThreshold() {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
case <-s.closeChan:
|
||||||
|
return
|
||||||
case value := <-s.timeoutThresholdChan:
|
case value := <-s.timeoutThresholdChan:
|
||||||
log.Infof("%s: warning: heartbeat near election timeout: %v", s.Config.Name, value)
|
log.Infof("%s: warning: heartbeat near election timeout: %v", s.Config.Name, value)
|
||||||
case <-closeChan:
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(ThresholdMonitorTimeout)
|
timer := time.NewTimer(ThresholdMonitorTimeout)
|
||||||
|
defer timer.Stop()
|
||||||
|
select {
|
||||||
|
case <-s.closeChan:
|
||||||
|
return
|
||||||
|
case <-timer.C:
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// monitorActiveSize has the leader periodically check the status of cluster
|
// monitorActiveSize has the leader periodically check the status of cluster
|
||||||
// nodes and swaps them out for standbys as needed.
|
// nodes and swaps them out for standbys as needed.
|
||||||
func (s *PeerServer) monitorActiveSize(closeChan chan bool) {
|
func (s *PeerServer) monitorActiveSize() {
|
||||||
for {
|
for {
|
||||||
|
timer := time.NewTimer(ActiveMonitorTimeout)
|
||||||
|
defer timer.Stop()
|
||||||
select {
|
select {
|
||||||
case <-time.After(ActiveMonitorTimeout):
|
case <-s.closeChan:
|
||||||
case <-closeChan:
|
|
||||||
return
|
return
|
||||||
|
case <-timer.C:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ignore while this peer is not a leader.
|
// Ignore while this peer is not a leader.
|
||||||
@ -802,12 +830,14 @@ func (s *PeerServer) monitorActiveSize(closeChan chan bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// monitorPeerActivity has the leader periodically for dead nodes and demotes them.
|
// monitorPeerActivity has the leader periodically for dead nodes and demotes them.
|
||||||
func (s *PeerServer) monitorPeerActivity(closeChan chan bool) {
|
func (s *PeerServer) monitorPeerActivity() {
|
||||||
for {
|
for {
|
||||||
|
timer := time.NewTimer(PeerActivityMonitorTimeout)
|
||||||
|
defer timer.Stop()
|
||||||
select {
|
select {
|
||||||
case <-time.After(PeerActivityMonitorTimeout):
|
case <-s.closeChan:
|
||||||
case <-closeChan:
|
|
||||||
return
|
return
|
||||||
|
case <-timer.C:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ignore while this peer is not a leader.
|
// Ignore while this peer is not a leader.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user