etcd/server/peer_server.go
Yicheng Qin 3a4df1612c feat(discovery): adjust boot order to find peers
The boot order for peers is -discovery, -peers, log data, forming
new cluster itself.

Special rules:
1. If discovery succeeds, it would find peers specified by discover URL
only.
2. Etcd would fail when meeting bad -discovery, no -peers and log data.

Add TestDiscoveryDownNoBackupPeersWithDataDir as the test.
2014-02-17 12:53:39 -08:00

535 lines
14 KiB
Go

package server
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"strconv"
"time"
"github.com/coreos/etcd/third_party/github.com/coreos/raft"
"github.com/coreos/etcd/third_party/github.com/gorilla/mux"
"github.com/coreos/etcd/discovery"
etcdErr "github.com/coreos/etcd/error"
"github.com/coreos/etcd/log"
"github.com/coreos/etcd/metrics"
"github.com/coreos/etcd/store"
)
const ThresholdMonitorTimeout = 5 * time.Second
type PeerServerConfig struct {
Name string
Scheme string
URL string
SnapshotCount int
MaxClusterSize int
RetryTimes int
RetryInterval float64
}
type PeerServer struct {
Config PeerServerConfig
raftServer raft.Server
server *Server
joinIndex uint64
followersStats *raftFollowersStats
serverStats *raftServerStats
registry *Registry
store store.Store
snapConf *snapshotConf
closeChan chan bool
timeoutThresholdChan chan interface{}
metrics *metrics.Bucket
}
// TODO: find a good policy to do snapshot
type snapshotConf struct {
// Etcd will check if snapshot is need every checkingInterval
checkingInterval time.Duration
// The index when the last snapshot happened
lastIndex uint64
// If the incremental number of index since the last snapshot
// exceeds the snapshot Threshold, etcd will do a snapshot
snapshotThr uint64
}
func NewPeerServer(psConfig PeerServerConfig, registry *Registry, store store.Store, mb *metrics.Bucket, followersStats *raftFollowersStats, serverStats *raftServerStats) *PeerServer {
s := &PeerServer{
Config: psConfig,
registry: registry,
store: store,
followersStats: followersStats,
serverStats: serverStats,
timeoutThresholdChan: make(chan interface{}, 1),
metrics: mb,
}
return s
}
func (s *PeerServer) SetRaftServer(raftServer raft.Server) {
s.snapConf = &snapshotConf{
checkingInterval: time.Second * 3,
// this is not accurate, we will update raft to provide an api
lastIndex: raftServer.CommitIndex(),
snapshotThr: uint64(s.Config.SnapshotCount),
}
raftServer.AddEventListener(raft.StateChangeEventType, s.raftEventLogger)
raftServer.AddEventListener(raft.LeaderChangeEventType, s.raftEventLogger)
raftServer.AddEventListener(raft.TermChangeEventType, s.raftEventLogger)
raftServer.AddEventListener(raft.AddPeerEventType, s.raftEventLogger)
raftServer.AddEventListener(raft.RemovePeerEventType, s.raftEventLogger)
raftServer.AddEventListener(raft.HeartbeatIntervalEventType, s.raftEventLogger)
raftServer.AddEventListener(raft.ElectionTimeoutThresholdEventType, s.raftEventLogger)
raftServer.AddEventListener(raft.HeartbeatEventType, s.recordMetricEvent)
s.raftServer = raftServer
}
// Helper function to do discovery and return results in expected format
func (s *PeerServer) handleDiscovery(discoverURL string) (peers []string, err error) {
peers, err = discovery.Do(discoverURL, s.Config.Name, s.Config.URL)
// Warn about errors coming from discovery, this isn't fatal
// since the user might have provided a peer list elsewhere,
// or there is some log in data dir.
if err != nil {
log.Warnf("Discovery encountered an error: %v", err)
return
}
for i := range peers {
// Strip the scheme off of the peer if it has one
// TODO(bp): clean this up!
purl, err := url.Parse(peers[i])
if err == nil {
peers[i] = purl.Host
}
}
log.Infof("Discovery fetched back peer list: %v", peers)
return
}
// Try all possible ways to find clusters to join
// Include -discovery, -peers and log data in -data-dir
//
// Peer discovery follows this order:
// 1. -discovery
// 2. -peers
// 3. previous peers in -data-dir
func (s *PeerServer) findCluster(discoverURL string, peers []string) {
// Attempt cluster discovery
toDiscover := discoverURL != ""
if toDiscover {
discoverPeers, discoverErr := s.handleDiscovery(discoverURL)
// It is registered in discover url
if discoverErr == nil {
// start as a leader in a new cluster
if len(discoverPeers) == 0 {
log.Debug("This peer is starting a brand new cluster based on discover URL.")
s.startAsLeader()
} else {
s.startAsFollower(discoverPeers)
}
return
}
}
hasPeerList := len(peers) > 0
// if there is log in data dir, append previous peers to peers in config
// to find cluster
prevPeers := s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name)
for i := 0; i < len(prevPeers); i++ {
u, err := url.Parse(prevPeers[i])
if err != nil {
log.Debug("rejoin cannot parse url: ", err)
}
prevPeers[i] = u.Host
}
peers = append(peers, prevPeers...)
// if there is backup peer lists, use it to find cluster
if len(peers) > 0 {
ok := s.joinCluster(peers)
if !ok {
log.Warn("No living peers are found!")
} else {
log.Debugf("%s restart as a follower based on peers[%v]", s.Config.Name)
return
}
}
if !s.raftServer.IsLogEmpty() {
log.Debug("Entire cluster is down! %v will restart the cluster.", s.Config.Name)
return
}
if toDiscover {
log.Fatalf("Discovery failed, no available peers in backup list, and no log data")
}
if hasPeerList {
log.Fatalf("No available peers in backup list, and no log data")
}
log.Infof("This peer is starting a brand new cluster now.")
s.startAsLeader()
}
// Start the raft server
func (s *PeerServer) Start(snapshot bool, discoverURL string, peers []string) error {
// LoadSnapshot
if snapshot {
err := s.raftServer.LoadSnapshot()
if err == nil {
log.Debugf("%s finished load snapshot", s.Config.Name)
} else {
log.Debug(err)
}
}
s.raftServer.Start()
s.findCluster(discoverURL, peers)
s.closeChan = make(chan bool)
go s.monitorSync()
go s.monitorTimeoutThreshold(s.closeChan)
// open the snapshot
if snapshot {
go s.monitorSnapshot()
}
return nil
}
func (s *PeerServer) Stop() {
if s.closeChan != nil {
close(s.closeChan)
s.closeChan = nil
}
s.raftServer.Stop()
}
func (s *PeerServer) HTTPHandler() http.Handler {
router := mux.NewRouter()
// internal commands
router.HandleFunc("/name", s.NameHttpHandler)
router.HandleFunc("/version", s.VersionHttpHandler)
router.HandleFunc("/version/{version:[0-9]+}/check", s.VersionCheckHttpHandler)
router.HandleFunc("/upgrade", s.UpgradeHttpHandler)
router.HandleFunc("/join", s.JoinHttpHandler)
router.HandleFunc("/remove/{name:.+}", s.RemoveHttpHandler)
router.HandleFunc("/vote", s.VoteHttpHandler)
router.HandleFunc("/log", s.GetLogHttpHandler)
router.HandleFunc("/log/append", s.AppendEntriesHttpHandler)
router.HandleFunc("/snapshot", s.SnapshotHttpHandler)
router.HandleFunc("/snapshotRecovery", s.SnapshotRecoveryHttpHandler)
router.HandleFunc("/etcdURL", s.EtcdURLHttpHandler)
return router
}
// Retrieves the underlying Raft server.
func (s *PeerServer) RaftServer() raft.Server {
return s.raftServer
}
// Associates the client server with the peer server.
func (s *PeerServer) SetServer(server *Server) {
s.server = server
}
func (s *PeerServer) startAsLeader() {
// leader need to join self as a peer
for {
_, err := s.raftServer.Do(NewJoinCommand(store.MinVersion(), store.MaxVersion(), s.raftServer.Name(), s.Config.URL, s.server.URL()))
if err == nil {
break
}
}
log.Debugf("%s start as a leader", s.Config.Name)
}
func (s *PeerServer) startAsFollower(cluster []string) {
// start as a follower in a existing cluster
for i := 0; i < s.Config.RetryTimes; i++ {
ok := s.joinCluster(cluster)
if ok {
return
}
log.Warnf("%v is unable to join the cluster using any of the peers %v at %dth time. Retrying in %.1f seconds", s.Config.Name, cluster, i, s.Config.RetryInterval)
time.Sleep(time.Second * time.Duration(s.Config.RetryInterval))
}
log.Fatalf("Cannot join the cluster via given peers after %x retries", s.Config.RetryTimes)
}
// getVersion fetches the peer version of a cluster.
func getVersion(t *transporter, versionURL url.URL) (int, error) {
resp, req, err := t.Get(versionURL.String())
if err != nil {
return 0, err
}
defer resp.Body.Close()
t.CancelWhenTimeout(req)
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return 0, err
}
// Parse version number.
version, _ := strconv.Atoi(string(body))
return version, nil
}
// Upgradable checks whether all peers in a cluster support an upgrade to the next store version.
func (s *PeerServer) Upgradable() error {
nextVersion := s.store.Version() + 1
for _, peerURL := range s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name) {
u, err := url.Parse(peerURL)
if err != nil {
return fmt.Errorf("PeerServer: Cannot parse URL: '%s' (%s)", peerURL, err)
}
t, _ := s.raftServer.Transporter().(*transporter)
checkURL := (&url.URL{Host: u.Host, Scheme: s.Config.Scheme, Path: fmt.Sprintf("/version/%d/check", nextVersion)}).String()
resp, _, err := t.Get(checkURL)
if err != nil {
return fmt.Errorf("PeerServer: Cannot check version compatibility: %s", u.Host)
}
if resp.StatusCode != 200 {
return fmt.Errorf("PeerServer: Version %d is not compatible with peer: %s", nextVersion, u.Host)
}
}
return nil
}
func (s *PeerServer) joinCluster(cluster []string) bool {
for _, peer := range cluster {
if len(peer) == 0 {
continue
}
err := s.joinByPeer(s.raftServer, peer, s.Config.Scheme)
if err == nil {
log.Debugf("%s joined the cluster via peer %s", s.Config.Name, peer)
return true
}
if _, ok := err.(etcdErr.Error); ok {
log.Fatal(err)
}
log.Warnf("Attempt to join via %s failed: %s", peer, err)
}
return false
}
// Send join requests to peer.
func (s *PeerServer) joinByPeer(server raft.Server, peer string, scheme string) error {
var b bytes.Buffer
// t must be ok
t, _ := server.Transporter().(*transporter)
// Our version must match the leaders version
versionURL := url.URL{Host: peer, Scheme: scheme, Path: "/version"}
version, err := getVersion(t, versionURL)
if err != nil {
return fmt.Errorf("Error during join version check: %v", err)
}
if version < store.MinVersion() || version > store.MaxVersion() {
return fmt.Errorf("Unable to join: cluster version is %d; version compatibility is %d - %d", version, store.MinVersion(), store.MaxVersion())
}
json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL()))
joinURL := url.URL{Host: peer, Scheme: scheme, Path: "/join"}
log.Debugf("Send Join Request to %s", joinURL.String())
resp, req, err := t.Post(joinURL.String(), &b)
for {
if err != nil {
return fmt.Errorf("Unable to join: %v", err)
}
if resp != nil {
defer resp.Body.Close()
t.CancelWhenTimeout(req)
if resp.StatusCode == http.StatusOK {
b, _ := ioutil.ReadAll(resp.Body)
s.joinIndex, _ = binary.Uvarint(b)
return nil
}
if resp.StatusCode == http.StatusTemporaryRedirect {
address := resp.Header.Get("Location")
log.Debugf("Send Join Request to %s", address)
json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL()))
resp, req, err = t.Post(address, &b)
} else if resp.StatusCode == http.StatusBadRequest {
log.Debug("Reach max number peers in the cluster")
decoder := json.NewDecoder(resp.Body)
err := &etcdErr.Error{}
decoder.Decode(err)
return *err
} else {
return fmt.Errorf("Unable to join")
}
}
}
}
func (s *PeerServer) Stats() []byte {
s.serverStats.LeaderInfo.Uptime = time.Now().Sub(s.serverStats.LeaderInfo.startTime).String()
// TODO: register state listener to raft to change this field
// rather than compare the state each time Stats() is called.
if s.RaftServer().State() == raft.Leader {
s.serverStats.LeaderInfo.Name = s.RaftServer().Name()
}
queue := s.serverStats.sendRateQueue
s.serverStats.SendingPkgRate, s.serverStats.SendingBandwidthRate = queue.Rate()
queue = s.serverStats.recvRateQueue
s.serverStats.RecvingPkgRate, s.serverStats.RecvingBandwidthRate = queue.Rate()
b, _ := json.Marshal(s.serverStats)
return b
}
func (s *PeerServer) PeerStats() []byte {
if s.raftServer.State() == raft.Leader {
b, _ := json.Marshal(s.followersStats)
return b
}
return nil
}
// raftEventLogger converts events from the Raft server into log messages.
func (s *PeerServer) raftEventLogger(event raft.Event) {
value := event.Value()
prevValue := event.PrevValue()
if value == nil {
value = "<nil>"
}
if prevValue == nil {
prevValue = "<nil>"
}
switch event.Type() {
case raft.StateChangeEventType:
log.Infof("%s: state changed from '%v' to '%v'.", s.Config.Name, prevValue, value)
case raft.TermChangeEventType:
log.Infof("%s: term #%v started.", s.Config.Name, value)
case raft.LeaderChangeEventType:
log.Infof("%s: leader changed from '%v' to '%v'.", s.Config.Name, prevValue, value)
case raft.AddPeerEventType:
log.Infof("%s: peer added: '%v'", s.Config.Name, value)
case raft.RemovePeerEventType:
log.Infof("%s: peer removed: '%v'", s.Config.Name, value)
case raft.HeartbeatIntervalEventType:
var name = "<unknown>"
if peer, ok := value.(*raft.Peer); ok {
name = peer.Name
}
log.Infof("%s: warning: heartbeat timed out: '%v'", s.Config.Name, name)
case raft.ElectionTimeoutThresholdEventType:
select {
case s.timeoutThresholdChan <- value:
default:
}
}
}
func (s *PeerServer) recordMetricEvent(event raft.Event) {
name := fmt.Sprintf("raft.event.%s", event.Type())
value := event.Value().(time.Duration)
(*s.metrics).Timer(name).Update(value)
}
// logSnapshot logs about the snapshot that was taken.
func (s *PeerServer) logSnapshot(err error, currentIndex, count uint64) {
info := fmt.Sprintf("%s: snapshot of %d events at index %d", s.Config.Name, count, currentIndex)
if err != nil {
log.Infof("%s attempted and failed: %v", info, err)
} else {
log.Infof("%s completed", info)
}
}
func (s *PeerServer) monitorSnapshot() {
for {
time.Sleep(s.snapConf.checkingInterval)
currentIndex := s.RaftServer().CommitIndex()
count := currentIndex - s.snapConf.lastIndex
if uint64(count) > s.snapConf.snapshotThr {
err := s.raftServer.TakeSnapshot()
s.logSnapshot(err, currentIndex, count)
s.snapConf.lastIndex = currentIndex
}
}
}
func (s *PeerServer) monitorSync() {
ticker := time.Tick(time.Millisecond * 500)
for {
select {
case now := <-ticker:
if s.raftServer.State() == raft.Leader {
s.raftServer.Do(s.store.CommandFactory().CreateSyncCommand(now))
}
}
}
}
// monitorTimeoutThreshold groups timeout threshold events together and prints
// them as a single log line.
func (s *PeerServer) monitorTimeoutThreshold(closeChan chan bool) {
for {
select {
case value := <-s.timeoutThresholdChan:
log.Infof("%s: warning: heartbeat near election timeout: %v", s.Config.Name, value)
case <-closeChan:
return
}
time.Sleep(ThresholdMonitorTimeout)
}
}