From 8687dd38020ec86085d6df167eca3eb79648fbab Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Tue, 4 Feb 2014 08:34:27 -0800 Subject: [PATCH] feat(discovery): fully working discovery now --- Documentation/discovery-protocol.md | 10 +++---- config/config.go | 46 ++++++++++++++++++++--------- discovery/discovery.go | 30 +++++++++++-------- scripts/test-cluster | 13 ++++++-- server/peer_server_handlers.go | 6 ++-- server/usage.go | 2 +- 6 files changed, 70 insertions(+), 37 deletions(-) diff --git a/Documentation/discovery-protocol.md b/Documentation/discovery-protocol.md index 2d8a2c734..24253455e 100644 --- a/Documentation/discovery-protocol.md +++ b/Documentation/discovery-protocol.md @@ -1,14 +1,14 @@ # Discovery Protocol -Starting an etcd cluster initially can be painful since each machine needs to know of at least one live machine in the cluster. If you are trying to bring up a cluster all at once, say using an AWS cloud formation, you also need to coordinate who will be the initial cluster leader. The discovery protocol helps you by providing a way to discover the peers in a new etcd cluster using another already running etcd cluster. +Starting a new etcd cluster can be painful since each machine needs to know of at least one live machine in the cluster. If you are trying to bring up a new cluster all at once, say using an AWS cloud formation, you also need to coordinate who will be the initial cluster leader. The discovery protocol uses an existing running etcd cluster to start a second etcd cluster. -To use this protocol you add the command line flag `-discovery` to your etcd args. In this example we will use `http://example.com/v2/keys/_etcd/registry` as the URL prefix. +To use this feature you add the command line flag `-discovery` to your etcd args. In this example we will use `http://example.com/v2/keys/_etcd/registry` as the URL prefix. ## The Protocol By convention the etcd discovery protocol uses the key prefix `_etcd/registry`. A full URL to the keyspace will be `http://example.com/v2/keys/_etcd/registry`. -## Creating a New Cluster +### Creating a New Cluster Generate a unique token that will identify the new cluster and create a key called "_state". If you get a `201 Created` back then your key is unused and you can proceed with cluster creation. If the return value is `412 Precondition Failed` then you will need to create a new token. @@ -17,7 +17,7 @@ UUID=$(uuidgen) curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/_state?prevExist=false" -d value=init ``` -## Bringing up Machines +### Bringing up Machines Now that you have your cluster ID you can start bringing up machines. Every machine will follow this protocol internally in etcd if given a `-discovery`. @@ -29,7 +29,7 @@ The first thing etcd must do is register your machine. This is done by using the curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/${etcd_machine_name}?ttl=604800" -d value=${peer_addr} ``` -### Figuring out your Peers +### Discovering Peers Now that this etcd machine is registered it must discover its peers. diff --git a/config/config.go b/config/config.go index ee066b3a2..831d7ef86 100644 --- a/config/config.go +++ b/config/config.go @@ -145,21 +145,9 @@ func (c *Config) Load(arguments []string) error { // Attempt cluster discovery if c.Discovery != "" { - p, err := discovery.Do(c.Discovery, c.Name, c.Peer.Addr) - if err != nil { - log.Fatalf("Bootstrapping encountered an unexpected error: %v", err) + if err := c.handleDiscovery(); err != nil { + return err } - - for i := range p { - // Strip the scheme off of the peer if it has one - // TODO(bp): clean this up! - purl, err := url.Parse(p[i]) - if err == nil { - p[i] = purl.Host - } - } - - c.Peers = p } // Force remove server configuration if specified. @@ -226,6 +214,36 @@ func (c *Config) loadEnv(target interface{}) error { return nil } +func (c *Config) handleDiscovery() error { + p, err := discovery.Do(c.Discovery, c.Name, c.Peer.Addr) + + // This is fatal, discovery encountered an unexpected error + // and we have no peer list. + if err != nil && len(c.Peers) == 0 { + log.Fatalf("Discovery failed and a backup peer list wasn't provided: %v", err) + return err + } + + // Warn about errors coming from discovery, this isn't fatal + // since the user might have provided a peer list elsewhere. + if err != nil { + log.Warnf("Discovery encountered an error but a backup peer list (%v) was provided: %v", c.Peers, err) + } + + for i := range p { + // Strip the scheme off of the peer if it has one + // TODO(bp): clean this up! + purl, err := url.Parse(p[i]) + if err == nil { + p[i] = purl.Host + } + } + + c.Peers = p + + return nil +} + // Loads configuration from command line flags. func (c *Config) LoadFlags(arguments []string) error { var peers, cors, path string diff --git a/discovery/discovery.go b/discovery/discovery.go index ea3b56812..f87af2366 100644 --- a/discovery/discovery.go +++ b/discovery/discovery.go @@ -9,7 +9,7 @@ import ( "time" "github.com/coreos/etcd/log" - "github.com/coreos/go-etcd/etcd" + "github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd" ) const ( @@ -44,14 +44,21 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers [] return } - // prefix is appended to all keys + // prefix is prepended to all keys for this discovery d.prefix = strings.TrimPrefix(u.Path, "/v2/keys/") - // Connect to a scheme://host not a full URL with path + // keep the old path in case we need to set the KeyPrefix below + oldPath := u.Path u.Path = "" - log.Infof("Bootstrapping via %s using prefix %s.", u.String(), d.prefix) + + // Connect to a scheme://host not a full URL with path + log.Infof("Discovery via %s using prefix %s.", u.String(), d.prefix) d.client = etcd.NewClient([]string{u.String()}) + if !strings.HasPrefix(oldPath, "/v2/keys") { + d.client.SetKeyPrefix("") + } + // Register this machine first and announce that we are a member of // this cluster err = d.heartbeat() @@ -68,7 +75,7 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers [] // Bail out on unexpected errors if err != nil { - if etcdErr, ok := err.(etcd.EtcdError); !ok || etcdErr.ErrorCode != 101 { + if etcdErr, ok := err.(*etcd.EtcdError); !ok || etcdErr.ErrorCode != 101 { return nil, err } } @@ -76,11 +83,11 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers [] // If we got a response then the CAS was successful, we are leader if resp != nil && resp.Node.Value == startedState { // We are the leader, we have no peers - log.Infof("Bootstrapping was in 'init' state this machine is the initial leader.") + log.Infof("Discovery was in the 'init' state this machine is the initial leader.") return nil, nil } - // Fall through to finding the other discoveryped peers + // Fall through to finding the other discovery peers return d.findPeers() } @@ -93,7 +100,7 @@ func (d *Discoverer) findPeers() (peers []string, err error) { node := resp.Node if node == nil { - return nil, errors.New(fmt.Sprintf("%s key doesn't exist.", d.prefix)) + return nil, fmt.Errorf("%s key doesn't exist.", d.prefix) } for _, n := range node.Nodes { @@ -105,10 +112,10 @@ func (d *Discoverer) findPeers() (peers []string, err error) { } if len(peers) == 0 { - return nil, errors.New("No peers found.") + return nil, errors.New("Discovery found an initialized cluster but no peers are registered.") } - log.Infof("Bootstrap found peers %v", peers) + log.Infof("Discovery found peers %v", peers) return } @@ -122,7 +129,7 @@ func (d *Discoverer) startHeartbeat() { case <-ticker: err := d.heartbeat() if err != nil { - log.Warnf("Bootstrapping heartbeat failed: %v", err) + log.Warnf("Discovery heartbeat failed: %v", err) } } } @@ -130,7 +137,6 @@ func (d *Discoverer) startHeartbeat() { func (d *Discoverer) heartbeat() error { _, err := d.client.Set(path.Join(d.prefix, d.name), d.peer, defaultTTL) - return err } diff --git a/scripts/test-cluster b/scripts/test-cluster index af6022534..29510a6bc 100755 --- a/scripts/test-cluster +++ b/scripts/test-cluster @@ -6,16 +6,25 @@ ulimit -n unlimited tmux new-session -d -s $SESSION +peer_args= +if [ -n "${DISCOVERY_URL}" ]; then + peer_args="-discovery ${DISCOVERY_URL}" +fi + # Setup a window for tailing log files tmux new-window -t $SESSION:1 -n 'peers' tmux split-window -h tmux select-pane -t 0 -tmux send-keys "${DIR}/../bin/etcd -peer-addr 127.0.0.1:7001 -addr 127.0.0.1:4001 -data-dir peer1 -name peer1" C-m +tmux send-keys "${DIR}/../bin/etcd -peer-addr 127.0.0.1:7001 -addr 127.0.0.1:4001 -data-dir peer1 -name peer1 ${peer_args}" C-m + +if [ -n "${peer_args}" ]; then + peer_args="-peers 127.0.0.1:7001" +fi for i in 2 3; do tmux select-pane -t 0 tmux split-window -v - tmux send-keys "${DIR}/../bin/etcd -cors='*' -peer-addr 127.0.0.1:700${i} -addr 127.0.0.1:400${i} -peers 127.0.0.1:7001 -data-dir peer${i} -name peer${i}" C-m + tmux send-keys "${DIR}/../bin/etcd -cors='*' -peer-addr 127.0.0.1:700${i} -addr 127.0.0.1:400${i} -data-dir peer${i} -name peer${i} ${peer_args}" C-m done # Attach to session diff --git a/server/peer_server_handlers.go b/server/peer_server_handlers.go index f97c2470f..a32adb3cd 100644 --- a/server/peer_server_handlers.go +++ b/server/peer_server_handlers.go @@ -6,13 +6,13 @@ import ( "strconv" "time" + "github.com/coreos/etcd/third_party/github.com/coreos/raft" + "github.com/coreos/etcd/third_party/github.com/gorilla/mux" + etcdErr "github.com/coreos/etcd/error" uhttp "github.com/coreos/etcd/pkg/http" "github.com/coreos/etcd/log" "github.com/coreos/etcd/store" - - "github.com/coreos/etcd/third_party/github.com/coreos/raft" - "github.com/coreos/etcd/third_party/github.com/gorilla/mux" ) // Get all the current logs diff --git a/server/usage.go b/server/usage.go index 4d9340515..4e47512c5 100644 --- a/server/usage.go +++ b/server/usage.go @@ -26,7 +26,7 @@ Options: -vv Enabled very verbose logging. Cluster Configuration Options: - -bootstrap-url= URL to use for bootstrapping the peer list. + -discovery= Discovery service used to find a peer list. -peers-file= Path to a file containing the peer list. -peers=, Comma-separated list of peers. The members should match the peer's '-peer-addr' flag.