clientv3: throttle reconnection rate

Client was reconnecting after establishing connections because the lease
and watch APIs were thrashing. Instead, wait a little before accepting
new reconnect requests.
This commit is contained in:
Anthony Romano 2016-05-13 13:24:30 -07:00
parent 3c3bb3f97c
commit e8101ddf09

View File

@ -27,6 +27,7 @@ import (
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"golang.org/x/net/context" "golang.org/x/net/context"
"golang.org/x/time/rate"
"google.golang.org/grpc" "google.golang.org/grpc"
"google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials"
"google.golang.org/grpc/metadata" "google.golang.org/grpc/metadata"
@ -34,6 +35,9 @@ import (
var ( var (
ErrNoAvailableEndpoints = errors.New("etcdclient: no available endpoints") ErrNoAvailableEndpoints = errors.New("etcdclient: no available endpoints")
// minConnRetryWait is the minimum time between reconnects to avoid flooding
minConnRetryWait = time.Second
) )
// Client provides and manages an etcd v3 client session. // Client provides and manages an etcd v3 client session.
@ -191,7 +195,7 @@ func newClient(cfg *Config) (*Client, error) {
creds: creds, creds: creds,
ctx: ctx, ctx: ctx,
cancel: cancel, cancel: cancel,
reconnc: make(chan error), reconnc: make(chan error, 1),
newconnc: make(chan struct{}), newconnc: make(chan struct{}),
} }
@ -248,8 +252,11 @@ func (c *Client) retryConnection(err error) (newConn *grpc.ClientConn, dialErr e
// connStartRetry schedules a reconnect if one is not already running // connStartRetry schedules a reconnect if one is not already running
func (c *Client) connStartRetry(err error) { func (c *Client) connStartRetry(err error) {
c.mu.Lock()
ch := c.reconnc
defer c.mu.Unlock()
select { select {
case c.reconnc <- err: case ch <- err:
default: default:
} }
} }
@ -273,15 +280,20 @@ func (c *Client) connWait(ctx context.Context, err error) (*grpc.ClientConn, err
// connMonitor monitors the connection and handles retries // connMonitor monitors the connection and handles retries
func (c *Client) connMonitor() { func (c *Client) connMonitor() {
var err error var err error
for {
select { defer func() {
case err = <-c.reconnc:
case <-c.ctx.Done():
_, err = c.retryConnection(c.ctx.Err()) _, err = c.retryConnection(c.ctx.Err())
c.mu.Lock() c.mu.Lock()
c.lastConnErr = err c.lastConnErr = err
close(c.newconnc) close(c.newconnc)
c.mu.Unlock() c.mu.Unlock()
}()
limiter := rate.NewLimiter(rate.Every(minConnRetryWait), 1)
for limiter.Wait(c.ctx) == nil {
select {
case err = <-c.reconnc:
case <-c.ctx.Done():
return return
} }
conn, connErr := c.retryConnection(err) conn, connErr := c.retryConnection(err)
@ -290,6 +302,7 @@ func (c *Client) connMonitor() {
c.conn = conn c.conn = conn
close(c.newconnc) close(c.newconnc)
c.newconnc = make(chan struct{}) c.newconnc = make(chan struct{})
c.reconnc = make(chan error, 1)
c.mu.Unlock() c.mu.Unlock()
} }
} }