From 844208d7dde07a157597a8eafc831bb2374c4b37 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Sat, 23 Apr 2016 21:55:31 -0700 Subject: [PATCH] clientv3: retry on switchRemoteAndStream If switchRemoteAndStream fails, the whole lease API fails since the internal routine exits. We should only fail the whole API when there is a fatal error. For example, we should fail if we fail to connection to all the endpoints user provided. If we connect to an endpoint, but fail to create a stream, we should retry instead of returning error to fail the entire API. --- clientv3/lease.go | 50 ++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/clientv3/lease.go b/clientv3/lease.go index 1856982fe..4cfafc49e 100644 --- a/clientv3/lease.go +++ b/clientv3/lease.go @@ -388,32 +388,38 @@ func (l *lessor) getKeepAliveStream() pb.Lease_LeaseKeepAliveClient { } func (l *lessor) switchRemoteAndStream(prevErr error) error { - l.mu.Lock() - conn := l.conn - l.mu.Unlock() + for { + l.mu.Lock() + conn := l.conn + l.mu.Unlock() - var ( - err error - newConn *grpc.ClientConn - ) + var ( + err error + newConn *grpc.ClientConn + ) - if prevErr != nil { - conn.Close() - newConn, err = l.c.retryConnection(conn, prevErr) - if err != nil { - return err + if prevErr != nil { + conn.Close() + newConn, err = l.c.retryConnection(conn, prevErr) + if err != nil { + return err + } } + + l.mu.Lock() + if newConn != nil { + l.conn = newConn + } + + l.remote = pb.NewLeaseClient(l.conn) + l.mu.Unlock() + + prevErr = l.newStream() + if prevErr != nil { + continue + } + return nil } - - l.mu.Lock() - if newConn != nil { - l.conn = newConn - } - - l.remote = pb.NewLeaseClient(l.conn) - l.mu.Unlock() - - return l.newStream() } func (l *lessor) newStream() error {