mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
@@ -37,11 +37,11 @@ func (c *JoinCommand) CommandName() string {
|
||||
}
|
||||
|
||||
// Join a server to the cluster
|
||||
func (c *JoinCommand) Apply(server raft.Server) (interface{}, error) {
|
||||
ps, _ := server.Context().(*PeerServer)
|
||||
func (c *JoinCommand) Apply(context raft.Context) (interface{}, error) {
|
||||
ps, _ := context.Server().Context().(*PeerServer)
|
||||
|
||||
b := make([]byte, 8)
|
||||
binary.PutUvarint(b, server.CommitIndex())
|
||||
binary.PutUvarint(b, context.CommitIndex())
|
||||
|
||||
// Make sure we're not getting a cached value from the registry.
|
||||
ps.registry.Invalidate(c.Name)
|
||||
@@ -54,14 +54,14 @@ func (c *JoinCommand) Apply(server raft.Server) (interface{}, error) {
|
||||
// Check peer number in the cluster
|
||||
if ps.registry.Count() == ps.MaxClusterSize {
|
||||
log.Debug("Reject join request from ", c.Name)
|
||||
return []byte{0}, etcdErr.NewError(etcdErr.EcodeNoMorePeer, "", server.CommitIndex())
|
||||
return []byte{0}, etcdErr.NewError(etcdErr.EcodeNoMorePeer, "", context.CommitIndex())
|
||||
}
|
||||
|
||||
// Add to shared peer registry.
|
||||
ps.registry.Register(c.Name, c.RaftURL, c.EtcdURL)
|
||||
|
||||
// Add peer in raft
|
||||
err := server.AddPeer(c.Name, "")
|
||||
err := context.Server().AddPeer(c.Name, "")
|
||||
|
||||
// Add peer stats
|
||||
if c.Name != ps.RaftServer().Name() {
|
||||
|
||||
@@ -22,6 +22,8 @@ import (
|
||||
|
||||
const retryInterval = 10
|
||||
|
||||
const ThresholdMonitorTimeout = 5 * time.Second
|
||||
|
||||
type PeerServer struct {
|
||||
raftServer raft.Server
|
||||
server *Server
|
||||
@@ -42,6 +44,9 @@ type PeerServer struct {
|
||||
RetryTimes int
|
||||
HeartbeatTimeout time.Duration
|
||||
ElectionTimeout time.Duration
|
||||
|
||||
closeChan chan bool
|
||||
timeoutThresholdChan chan interface{}
|
||||
}
|
||||
|
||||
// TODO: find a good policy to do snapshot
|
||||
@@ -83,6 +88,8 @@ func NewPeerServer(name string, path string, url string, bindAddr string, tlsCon
|
||||
},
|
||||
HeartbeatTimeout: defaultHeartbeatTimeout,
|
||||
ElectionTimeout: defaultElectionTimeout,
|
||||
|
||||
timeoutThresholdChan: make(chan interface{}, 1),
|
||||
}
|
||||
|
||||
// Create transporter for raft
|
||||
@@ -95,6 +102,13 @@ func NewPeerServer(name string, path string, url string, bindAddr string, tlsCon
|
||||
}
|
||||
|
||||
s.raftServer = raftServer
|
||||
s.raftServer.AddEventListener(raft.StateChangeEventType, s.raftEventLogger)
|
||||
s.raftServer.AddEventListener(raft.LeaderChangeEventType, s.raftEventLogger)
|
||||
s.raftServer.AddEventListener(raft.TermChangeEventType, s.raftEventLogger)
|
||||
s.raftServer.AddEventListener(raft.AddPeerEventType, s.raftEventLogger)
|
||||
s.raftServer.AddEventListener(raft.RemovePeerEventType, s.raftEventLogger)
|
||||
s.raftServer.AddEventListener(raft.HeartbeatTimeoutEventType, s.raftEventLogger)
|
||||
s.raftServer.AddEventListener(raft.ElectionTimeoutThresholdEventType, s.raftEventLogger)
|
||||
|
||||
return s
|
||||
}
|
||||
@@ -143,7 +157,10 @@ func (s *PeerServer) ListenAndServe(snapshot bool, cluster []string) error {
|
||||
log.Debugf("%s restart as a follower", s.name)
|
||||
}
|
||||
|
||||
s.closeChan = make(chan bool)
|
||||
|
||||
go s.monitorSync()
|
||||
go s.monitorTimeoutThreshold(s.closeChan)
|
||||
|
||||
// open the snapshot
|
||||
if snapshot {
|
||||
@@ -201,6 +218,10 @@ func (s *PeerServer) listenAndServeTLS(certFile, keyFile string) error {
|
||||
|
||||
// Stops the server.
|
||||
func (s *PeerServer) Close() {
|
||||
if s.closeChan != nil {
|
||||
close(s.closeChan)
|
||||
s.closeChan = nil
|
||||
}
|
||||
if s.listener != nil {
|
||||
s.listener.Close()
|
||||
s.listener = nil
|
||||
@@ -429,6 +450,43 @@ func (s *PeerServer) PeerStats() []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
// raftEventLogger converts events from the Raft server into log messages.
|
||||
func (s *PeerServer) raftEventLogger(event raft.Event) {
|
||||
value := event.Value()
|
||||
prevValue := event.PrevValue()
|
||||
if value == nil {
|
||||
value = "<nil>"
|
||||
}
|
||||
if prevValue == nil {
|
||||
prevValue = "<nil>"
|
||||
}
|
||||
|
||||
switch event.Type() {
|
||||
case raft.StateChangeEventType:
|
||||
log.Infof("%s: state changed from '%v' to '%v'.", s.name, prevValue, value)
|
||||
case raft.TermChangeEventType:
|
||||
log.Infof("%s: term #%v started.", s.name, value)
|
||||
case raft.LeaderChangeEventType:
|
||||
log.Infof("%s: leader changed from '%v' to '%v'.", s.name, prevValue, value)
|
||||
case raft.AddPeerEventType:
|
||||
log.Infof("%s: peer added: '%v'", s.name, value)
|
||||
case raft.RemovePeerEventType:
|
||||
log.Infof("%s: peer removed: '%v'", s.name, value)
|
||||
case raft.HeartbeatTimeoutEventType:
|
||||
var name = "<unknown>"
|
||||
if peer, ok := value.(*raft.Peer); ok {
|
||||
name = peer.Name
|
||||
}
|
||||
log.Infof("%s: warning: heartbeat timed out: '%v'", s.name, name)
|
||||
case raft.ElectionTimeoutThresholdEventType:
|
||||
select {
|
||||
case s.timeoutThresholdChan <- value:
|
||||
default:
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PeerServer) monitorSnapshot() {
|
||||
for {
|
||||
time.Sleep(s.snapConf.checkingInterval)
|
||||
@@ -451,3 +509,18 @@ func (s *PeerServer) monitorSync() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// monitorTimeoutThreshold groups timeout threshold events together and prints
|
||||
// them as a single log line.
|
||||
func (s *PeerServer) monitorTimeoutThreshold(closeChan chan bool) {
|
||||
for {
|
||||
select {
|
||||
case value := <-s.timeoutThresholdChan:
|
||||
log.Infof("%s: warning: heartbeat near election timeout: %v", s.name, value)
|
||||
case <-closeChan:
|
||||
return
|
||||
}
|
||||
|
||||
time.Sleep(ThresholdMonitorTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ func (c *RemoveCommand) CommandName() string {
|
||||
}
|
||||
|
||||
// Remove a server from the cluster
|
||||
func (c *RemoveCommand) Apply(server raft.Server) (interface{}, error) {
|
||||
ps, _ := server.Context().(*PeerServer)
|
||||
func (c *RemoveCommand) Apply(context raft.Context) (interface{}, error) {
|
||||
ps, _ := context.Server().Context().(*PeerServer)
|
||||
|
||||
// Remove node from the shared registry.
|
||||
err := ps.registry.Unregister(c.Name)
|
||||
@@ -38,21 +38,21 @@ func (c *RemoveCommand) Apply(server raft.Server) (interface{}, error) {
|
||||
}
|
||||
|
||||
// Remove peer in raft
|
||||
err = server.RemovePeer(c.Name)
|
||||
err = context.Server().RemovePeer(c.Name)
|
||||
if err != nil {
|
||||
log.Debugf("Unable to remove peer: %s (%v)", c.Name, err)
|
||||
return []byte{0}, err
|
||||
}
|
||||
|
||||
if c.Name == server.Name() {
|
||||
if c.Name == context.Server().Name() {
|
||||
// the removed node is this node
|
||||
|
||||
// if the node is not replaying the previous logs
|
||||
// and the node has sent out a join request in this
|
||||
// start. It is sure that this node received a new remove
|
||||
// command and need to be removed
|
||||
if server.CommitIndex() > ps.joinIndex && ps.joinIndex != 0 {
|
||||
log.Debugf("server [%s] is removed", server.Name())
|
||||
if context.CommitIndex() > ps.joinIndex && ps.joinIndex != 0 {
|
||||
log.Debugf("server [%s] is removed", context.Server().Name())
|
||||
os.Exit(0)
|
||||
} else {
|
||||
// else ignore remove
|
||||
@@ -61,7 +61,7 @@ func (c *RemoveCommand) Apply(server raft.Server) (interface{}, error) {
|
||||
}
|
||||
|
||||
b := make([]byte, 8)
|
||||
binary.PutUvarint(b, server.CommitIndex())
|
||||
binary.PutUvarint(b, context.CommitIndex())
|
||||
|
||||
return b, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user