mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #5464 from heyitsanthony/fix-victim-watchers
mvcc: tighten up watcher cancelation and revision handling
This commit is contained in:
commit
9c767cbf98
@ -94,9 +94,12 @@ type serverWatchStream struct {
|
||||
|
||||
// closec indicates the stream is closed.
|
||||
closec chan struct{}
|
||||
|
||||
// wg waits for the send loop to complete
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) error {
|
||||
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) (err error) {
|
||||
sws := serverWatchStream{
|
||||
clusterID: ws.clusterID,
|
||||
memberID: ws.memberID,
|
||||
@ -109,23 +112,30 @@ func (ws *watchServer) Watch(stream pb.Watch_WatchServer) error {
|
||||
closec: make(chan struct{}),
|
||||
}
|
||||
|
||||
go sws.sendLoop()
|
||||
errc := make(chan error, 1)
|
||||
sws.wg.Add(1)
|
||||
go func() {
|
||||
errc <- sws.recvLoop()
|
||||
sws.close()
|
||||
sws.sendLoop()
|
||||
sws.wg.Done()
|
||||
}()
|
||||
|
||||
errc := make(chan error, 1)
|
||||
// Ideally recvLoop would also use sws.wg to signal its completion
|
||||
// but when stream.Context().Done() is closed, the stream's recv
|
||||
// may continue to block since it uses a different context, leading to
|
||||
// deadlock when calling sws.close().
|
||||
go func() { errc <- sws.recvLoop() }()
|
||||
|
||||
select {
|
||||
case err := <-errc:
|
||||
return err
|
||||
case err = <-errc:
|
||||
case <-stream.Context().Done():
|
||||
err := stream.Context().Err()
|
||||
err = stream.Context().Err()
|
||||
// the only server-side cancellation is noleader for now.
|
||||
if err == context.Canceled {
|
||||
return rpctypes.ErrGRPCNoLeader
|
||||
err = rpctypes.ErrGRPCNoLeader
|
||||
}
|
||||
return err
|
||||
}
|
||||
sws.close()
|
||||
return err
|
||||
}
|
||||
|
||||
func (sws *serverWatchStream) recvLoop() error {
|
||||
@ -292,6 +302,7 @@ func (sws *serverWatchStream) close() {
|
||||
sws.watchStream.Close()
|
||||
close(sws.closec)
|
||||
close(sws.ctrlStream)
|
||||
sws.wg.Wait()
|
||||
}
|
||||
|
||||
func (sws *serverWatchStream) newResponseHeader(rev int64) *pb.ResponseHeader {
|
||||
|
@ -190,19 +190,19 @@ func (s *watchableStore) watch(key, end []byte, startRev int64, id WatchID, ch c
|
||||
defer s.mu.Unlock()
|
||||
|
||||
wa := &watcher{
|
||||
key: key,
|
||||
end: end,
|
||||
cur: startRev,
|
||||
id: id,
|
||||
ch: ch,
|
||||
key: key,
|
||||
end: end,
|
||||
minRev: startRev,
|
||||
id: id,
|
||||
ch: ch,
|
||||
}
|
||||
|
||||
s.store.mu.Lock()
|
||||
synced := startRev > s.store.currentRev.main || startRev == 0
|
||||
if synced {
|
||||
wa.cur = s.store.currentRev.main + 1
|
||||
if startRev > wa.cur {
|
||||
wa.cur = startRev
|
||||
wa.minRev = s.store.currentRev.main + 1
|
||||
if startRev > wa.minRev {
|
||||
wa.minRev = startRev
|
||||
}
|
||||
}
|
||||
s.store.mu.Unlock()
|
||||
@ -214,30 +214,47 @@ func (s *watchableStore) watch(key, end []byte, startRev int64, id WatchID, ch c
|
||||
}
|
||||
watcherGauge.Inc()
|
||||
|
||||
cancel := cancelFunc(func() {
|
||||
return wa, func() { s.cancelWatcher(wa) }
|
||||
}
|
||||
|
||||
// cancelWatcher removes references of the watcher from the watchableStore
|
||||
func (s *watchableStore) cancelWatcher(wa *watcher) {
|
||||
for {
|
||||
s.mu.Lock()
|
||||
// remove references of the watcher
|
||||
|
||||
if s.unsynced.delete(wa) {
|
||||
slowWatcherGauge.Dec()
|
||||
watcherGauge.Dec()
|
||||
break
|
||||
} else if s.synced.delete(wa) {
|
||||
watcherGauge.Dec()
|
||||
} else {
|
||||
for _, wb := range s.victims {
|
||||
if wb[wa] != nil {
|
||||
slowWatcherGauge.Dec()
|
||||
watcherGauge.Dec()
|
||||
delete(wb, wa)
|
||||
break
|
||||
}
|
||||
break
|
||||
} else if wa.compacted {
|
||||
break
|
||||
}
|
||||
|
||||
if !wa.victim {
|
||||
panic("watcher not victim but not in watch groups")
|
||||
}
|
||||
|
||||
var victimBatch watcherBatch
|
||||
for _, wb := range s.victims {
|
||||
if wb[wa] != nil {
|
||||
victimBatch = wb
|
||||
break
|
||||
}
|
||||
}
|
||||
if victimBatch != nil {
|
||||
slowWatcherGauge.Dec()
|
||||
delete(victimBatch, wa)
|
||||
break
|
||||
}
|
||||
|
||||
// victim being processed so not accessible; retry
|
||||
s.mu.Unlock()
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
|
||||
// If we cannot find it, it should have finished watch.
|
||||
})
|
||||
|
||||
return wa, cancel
|
||||
watcherGauge.Dec()
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
// syncWatchersLoop syncs the watcher in the unsynced map every 100ms.
|
||||
@ -306,8 +323,10 @@ func (s *watchableStore) moveVictims() (moved int) {
|
||||
for _, wb := range victims {
|
||||
// try to send responses again
|
||||
for w, eb := range wb {
|
||||
// watcher has observed the store up to, but not including, w.minRev
|
||||
rev := w.minRev - 1
|
||||
select {
|
||||
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: w.cur}:
|
||||
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}:
|
||||
pendingEventsGauge.Add(float64(len(eb.evs)))
|
||||
default:
|
||||
if newVictim == nil {
|
||||
@ -328,10 +347,11 @@ func (s *watchableStore) moveVictims() (moved int) {
|
||||
// couldn't send watch response; stays victim
|
||||
continue
|
||||
}
|
||||
w.victim = false
|
||||
if eb.moreRev != 0 {
|
||||
w.cur = eb.moreRev
|
||||
w.minRev = eb.moreRev
|
||||
}
|
||||
if w.cur < curRev {
|
||||
if w.minRev <= curRev {
|
||||
s.unsynced.add(w)
|
||||
} else {
|
||||
slowWatcherGauge.Dec()
|
||||
@ -385,17 +405,20 @@ func (s *watchableStore) syncWatchers() {
|
||||
var victims watcherBatch
|
||||
wb := newWatcherBatch(wg, evs)
|
||||
for w := range wg.watchers {
|
||||
w.minRev = curRev + 1
|
||||
|
||||
eb, ok := wb[w]
|
||||
if !ok {
|
||||
// bring un-notified watcher to synced
|
||||
w.cur = curRev
|
||||
s.synced.add(w)
|
||||
s.unsynced.delete(w)
|
||||
continue
|
||||
}
|
||||
|
||||
w.cur = curRev
|
||||
isBlocked := false
|
||||
if eb.moreRev != 0 {
|
||||
w.minRev = eb.moreRev
|
||||
}
|
||||
|
||||
select {
|
||||
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: curRev}:
|
||||
pendingEventsGauge.Add(float64(len(eb.evs)))
|
||||
@ -403,14 +426,14 @@ func (s *watchableStore) syncWatchers() {
|
||||
if victims == nil {
|
||||
victims = make(watcherBatch)
|
||||
}
|
||||
isBlocked = true
|
||||
w.victim = true
|
||||
}
|
||||
|
||||
if isBlocked {
|
||||
if w.victim {
|
||||
victims[w] = eb
|
||||
} else {
|
||||
if eb.moreRev != 0 {
|
||||
w.cur = eb.moreRev
|
||||
// stay unsynced; more to read
|
||||
continue
|
||||
}
|
||||
s.synced.add(w)
|
||||
@ -458,14 +481,15 @@ func (s *watchableStore) notify(rev int64, evs []mvccpb.Event) {
|
||||
plog.Panicf("unexpected multiple revisions in notification")
|
||||
}
|
||||
select {
|
||||
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: s.Rev()}:
|
||||
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}:
|
||||
pendingEventsGauge.Add(float64(len(eb.evs)))
|
||||
default:
|
||||
// move slow watcher to victims
|
||||
w.cur = rev
|
||||
w.minRev = rev + 1
|
||||
if victim == nil {
|
||||
victim = make(watcherBatch)
|
||||
}
|
||||
w.victim = true
|
||||
victim[w] = eb
|
||||
s.synced.delete(w)
|
||||
slowWatcherGauge.Inc()
|
||||
@ -508,12 +532,15 @@ type watcher struct {
|
||||
// If end is set, the watcher is on a range.
|
||||
end []byte
|
||||
|
||||
// cur is the current watcher revision of a unsynced watcher.
|
||||
// cur will be updated for unsynced watcher while it is catching up.
|
||||
// cur is startRev of a synced watcher.
|
||||
// cur will not be updated for synced watcher.
|
||||
cur int64
|
||||
id WatchID
|
||||
// victim is set when ch is blocked and undergoing victim processing
|
||||
victim bool
|
||||
|
||||
// compacted is set when the watcher is removed because of compaction
|
||||
compacted bool
|
||||
|
||||
// minRev is the minimum revision update the watcher will accept
|
||||
minRev int64
|
||||
id WatchID
|
||||
|
||||
// a chan to send out the watch response.
|
||||
// The chan might be shared with other watchers.
|
||||
|
@ -193,8 +193,8 @@ func TestSyncWatchers(t *testing.T) {
|
||||
}
|
||||
|
||||
for w := range sws {
|
||||
if w.cur != s.Rev() {
|
||||
t.Errorf("w.cur = %d, want %d", w.cur, s.Rev())
|
||||
if w.minRev != s.Rev()+1 {
|
||||
t.Errorf("w.minRev = %d, want %d", w.minRev, s.Rev()+1)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -81,7 +81,7 @@ func newWatcherBatch(wg *watcherGroup, evs []mvccpb.Event) watcherBatch {
|
||||
wb := make(watcherBatch)
|
||||
for _, ev := range evs {
|
||||
for w := range wg.watcherSetByKey(string(ev.Kv.Key)) {
|
||||
if ev.Kv.ModRevision >= w.cur {
|
||||
if ev.Kv.ModRevision >= w.minRev {
|
||||
// don't double notify
|
||||
wb.add(w, ev)
|
||||
}
|
||||
@ -233,20 +233,21 @@ func (wg *watcherGroup) choose(maxWatchers int, curRev, compactRev int64) (*watc
|
||||
func (wg *watcherGroup) chooseAll(curRev, compactRev int64) int64 {
|
||||
minRev := int64(math.MaxInt64)
|
||||
for w := range wg.watchers {
|
||||
if w.cur > curRev {
|
||||
if w.minRev > curRev {
|
||||
panic("watcher current revision should not exceed current revision")
|
||||
}
|
||||
if w.cur < compactRev {
|
||||
if w.minRev < compactRev {
|
||||
select {
|
||||
case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactRev}:
|
||||
w.compacted = true
|
||||
wg.delete(w)
|
||||
default:
|
||||
// retry next time
|
||||
}
|
||||
continue
|
||||
}
|
||||
if minRev > w.cur {
|
||||
minRev = w.cur
|
||||
if minRev > w.minRev {
|
||||
minRev = w.minRev
|
||||
}
|
||||
}
|
||||
return minRev
|
||||
|
Loading…
x
Reference in New Issue
Block a user