clientv3: retry on switchRemoteAndStream

If switchRemoteAndStream fails, the whole lease API fails since
the internal routine exits. We should only fail the whole API when
there is a fatal error. For example, we should fail if we fail to
connection to all the endpoints user provided.

If we connect to an endpoint, but fail to create a stream, we should
retry instead of returning error to fail the entire API.
This commit is contained in:
Xiang Li 2016-04-23 21:55:31 -07:00
parent e51e146a19
commit 844208d7dd

View File

@ -388,32 +388,38 @@ func (l *lessor) getKeepAliveStream() pb.Lease_LeaseKeepAliveClient {
}
func (l *lessor) switchRemoteAndStream(prevErr error) error {
l.mu.Lock()
conn := l.conn
l.mu.Unlock()
for {
l.mu.Lock()
conn := l.conn
l.mu.Unlock()
var (
err error
newConn *grpc.ClientConn
)
var (
err error
newConn *grpc.ClientConn
)
if prevErr != nil {
conn.Close()
newConn, err = l.c.retryConnection(conn, prevErr)
if err != nil {
return err
if prevErr != nil {
conn.Close()
newConn, err = l.c.retryConnection(conn, prevErr)
if err != nil {
return err
}
}
l.mu.Lock()
if newConn != nil {
l.conn = newConn
}
l.remote = pb.NewLeaseClient(l.conn)
l.mu.Unlock()
prevErr = l.newStream()
if prevErr != nil {
continue
}
return nil
}
l.mu.Lock()
if newConn != nil {
l.conn = newConn
}
l.remote = pb.NewLeaseClient(l.conn)
l.mu.Unlock()
return l.newStream()
}
func (l *lessor) newStream() error {