From 6f850a65a115f663bf0cd0499d16ff6e0131dfde Mon Sep 17 00:00:00 2001 From: qupeng Date: Wed, 4 Mar 2020 01:20:25 +0800 Subject: [PATCH] raft: cleanup read index code (#11528) Signed-off-by: qupeng --- raft/raft.go | 85 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/raft/raft.go b/raft/raft.go index d63ee7f03..185178c0a 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -1078,39 +1078,34 @@ func stepLeader(r *raft, m pb.Message) error { r.bcastAppend() return nil case pb.MsgReadIndex: - // If more than the local vote is needed, go through a full broadcast, - // otherwise optimize. - if !r.prs.IsSingleton() { - if r.raftLog.zeroTermOnErrCompacted(r.raftLog.term(r.raftLog.committed)) != r.Term { - // Reject read only request when this leader has not committed any log entry at its term. - return nil - } - - // thinking: use an interally defined context instead of the user given context. - // We can express this in terms of the term and index instead of a user-supplied value. - // This would allow multiple reads to piggyback on the same message. - switch r.readOnly.option { - case ReadOnlySafe: - r.readOnly.addRequest(r.raftLog.committed, m) - // The local node automatically acks the request. - r.readOnly.recvAck(r.id, m.Entries[0].Data) - r.bcastHeartbeatWithCtx(m.Entries[0].Data) - case ReadOnlyLeaseBased: - ri := r.raftLog.committed - if m.From == None || m.From == r.id { // from local member - r.readStates = append(r.readStates, ReadState{Index: ri, RequestCtx: m.Entries[0].Data}) - } else { - r.send(pb.Message{To: m.From, Type: pb.MsgReadIndexResp, Index: ri, Entries: m.Entries}) - } - } - } else { // only one voting member (the leader) in the cluster - if m.From == None || m.From == r.id { // from leader itself - r.readStates = append(r.readStates, ReadState{Index: r.raftLog.committed, RequestCtx: m.Entries[0].Data}) - } else { // from learner member - r.send(pb.Message{To: m.From, Type: pb.MsgReadIndexResp, Index: r.raftLog.committed, Entries: m.Entries}) + // only one voting member (the leader) in the cluster + if r.prs.IsSingleton() { + if resp := r.responseToReadIndexReq(m, r.raftLog.committed); resp.To != None { + r.send(resp) } + return nil } + // Reject read only request when this leader has not committed any log entry at its term. + if !r.committedEntryInCurrentTerm() { + return nil + } + + // thinking: use an interally defined context instead of the user given context. + // We can express this in terms of the term and index instead of a user-supplied value. + // This would allow multiple reads to piggyback on the same message. + switch r.readOnly.option { + // If more than the local vote is needed, go through a full broadcast. + case ReadOnlySafe: + r.readOnly.addRequest(r.raftLog.committed, m) + // The local node automatically acks the request. + r.readOnly.recvAck(r.id, m.Entries[0].Data) + r.bcastHeartbeatWithCtx(m.Entries[0].Data) + case ReadOnlyLeaseBased: + if resp := r.responseToReadIndexReq(m, r.raftLog.committed); resp.To != None { + r.send(resp) + } + } return nil } @@ -1200,11 +1195,8 @@ func stepLeader(r *raft, m pb.Message) error { rss := r.readOnly.advance(m) for _, rs := range rss { - req := rs.req - if req.From == None || req.From == r.id { // from local member - r.readStates = append(r.readStates, ReadState{Index: rs.index, RequestCtx: req.Entries[0].Data}) - } else { - r.send(pb.Message{To: req.From, Type: pb.MsgReadIndexResp, Index: rs.index, Entries: req.Entries}) + if resp := r.responseToReadIndexReq(rs.req, rs.index); resp.To != None { + r.send(resp) } } case pb.MsgSnapStatus: @@ -1601,6 +1593,29 @@ func (r *raft) abortLeaderTransfer() { r.leadTransferee = None } +// committedEntryInCurrentTerm return true if the peer has committed an entry in its term. +func (r *raft) committedEntryInCurrentTerm() bool { + return r.raftLog.zeroTermOnErrCompacted(r.raftLog.term(r.raftLog.committed)) == r.Term +} + +// responseToReadIndexReq constructs a response for `req`. If `req` comes from the peer +// itself, a blank value will be returned. +func (r *raft) responseToReadIndexReq(req pb.Message, readIndex uint64) pb.Message { + if req.From == None || req.From == r.id { + r.readStates = append(r.readStates, ReadState{ + Index: readIndex, + RequestCtx: req.Entries[0].Data, + }) + return pb.Message{} + } + return pb.Message{ + Type: pb.MsgReadIndexResp, + To: req.From, + Index: readIndex, + Entries: req.Entries, + } +} + // increaseUncommittedSize computes the size of the proposed entries and // determines whether they would push leader over its maxUncommittedSize limit. // If the new entries would exceed the limit, the method returns false. If not,