Merge pull request #534 from philips/discovery-protocol-fix

fix(discovery): use prevExist instead of prevValue=init
This commit is contained in:
Brandon Philips 2014-02-05 15:15:08 -08:00
commit 39518b463a
3 changed files with 16 additions and 16 deletions

View File

@ -10,11 +10,10 @@ By convention the etcd discovery protocol uses the key prefix `_etcd/registry`.
### Creating a New Cluster
Generate a unique token that will identify the new cluster and create a key called "_state". If you get a `201 Created` back then your key is unused and you can proceed with cluster creation. If the return value is `412 Precondition Failed` then you will need to create a new token.
Generate a unique token that will identify the new cluster. This will be used as a key prefix in the following steps. An easy way to do this is to use uuidgen:
```
UUID=$(uuidgen)
curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/_state?prevExist=false" -d value=init
```
### Bringing up Machines
@ -33,10 +32,10 @@ curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/${etcd_machine_na
Now that this etcd machine is registered it must discover its peers.
But, the tricky bit of starting a new cluster is that one machine needs to assume the initial role of leader and will have no peers. To figure out if another machine has already started the cluster etcd needs to update the `_state` key from "init" to "started":
But, the tricky bit of starting a new cluster is that one machine needs to assume the initial role of leader and will have no peers. To figure out if another machine has already started the cluster etcd needs to create the `_state` key and set its value to "started":
```
curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/_state?prevValue=init" -d value=started
curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/_state?prevExist=false" -d value=started
```
If this returns a `200 OK` response then this machine is the initial leader and should start with no peers configured. If, however, this returns a `412 Precondition Failed` then you need to find all of the registered peers:

View File

@ -8,13 +8,13 @@ import (
"strings"
"time"
etcdErr "github.com/coreos/etcd/error"
"github.com/coreos/etcd/log"
"github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
)
const (
stateKey = "_state"
initState = "init"
startedState = "started"
defaultTTL = 604800 // One week TTL
)
@ -71,11 +71,11 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []
go d.startHeartbeat()
// Attempt to take the leadership role, if there is no error we are it!
resp, err := d.client.CompareAndSwap(path.Join(d.prefix, stateKey), startedState, 0, initState, 0)
resp, err := d.client.Create(path.Join(d.prefix, stateKey), startedState, 0)
// Bail out on unexpected errors
if err != nil {
if etcdErr, ok := err.(*etcd.EtcdError); !ok || etcdErr.ErrorCode != 101 {
if clientErr, ok := err.(*etcd.EtcdError); !ok || clientErr.ErrorCode != etcdErr.EcodeNodeExist {
return nil, err
}
}
@ -83,7 +83,7 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []
// If we got a response then the CAS was successful, we are leader
if resp != nil && resp.Node.Value == startedState {
// We are the leader, we have no peers
log.Infof("Discovery was in the 'init' state this machine is the initial leader.")
log.Infof("Discovery _state was empty, so this machine is the initial leader.")
return nil, nil
}

View File

@ -93,11 +93,6 @@ func TestDiscoveryDownWithBackupPeers(t *testing.T) {
// registers as the first peer.
func TestDiscoveryFirstPeer(t *testing.T) {
etcdtest.RunServer(func(s *server.Server) {
v := url.Values{}
v.Set("value", "init")
resp, err := etcdtest.PutForm(fmt.Sprintf("%s%s", s.URL(), "/v2/keys/_etcd/registry/2/_state"), v)
assert.Equal(t, resp.StatusCode, http.StatusCreated)
proc, err := startServer([]string{"-discovery", s.URL() + "/v2/keys/_etcd/registry/2"})
if err != nil {
t.Fatal(err.Error())
@ -211,10 +206,16 @@ func TestDiscoverySecondPeerUp(t *testing.T) {
}
// TODO(bp): need to have a better way of knowing a machine is up
time.Sleep(1 * time.Second)
for i := 0; i < 10; i++ {
time.Sleep(1 * time.Second)
etcdc := goetcd.NewClient(nil)
_, err = etcdc.Set("foobar", "baz", 0)
if err == nil {
break
}
}
etcdc := goetcd.NewClient(nil)
_, err = etcdc.Set("foobar", "baz", 0)
if err != nil {
t.Fatal(err.Error())
}