mvcc: tighten up watcher cancelation and revision handling

Makes w.cur into w.minrev, the minimum revision for the next update, and
retries cancelation if the watcher isn't found (because it's being processed
by moveVictims).

Fixes: #5459
This commit is contained in:
Anthony Romano 2016-05-26 16:10:58 -06:00
parent c438310634
commit cfb3f96c2b
3 changed files with 76 additions and 48 deletions

View File

@ -192,7 +192,7 @@ func (s *watchableStore) watch(key, end []byte, startRev int64, id WatchID, ch c
wa := &watcher{ wa := &watcher{
key: key, key: key,
end: end, end: end,
cur: startRev, minRev: startRev,
id: id, id: id,
ch: ch, ch: ch,
} }
@ -200,9 +200,9 @@ func (s *watchableStore) watch(key, end []byte, startRev int64, id WatchID, ch c
s.store.mu.Lock() s.store.mu.Lock()
synced := startRev > s.store.currentRev.main || startRev == 0 synced := startRev > s.store.currentRev.main || startRev == 0
if synced { if synced {
wa.cur = s.store.currentRev.main + 1 wa.minRev = s.store.currentRev.main + 1
if startRev > wa.cur { if startRev > wa.minRev {
wa.cur = startRev wa.minRev = startRev
} }
} }
s.store.mu.Unlock() s.store.mu.Unlock()
@ -214,30 +214,47 @@ func (s *watchableStore) watch(key, end []byte, startRev int64, id WatchID, ch c
} }
watcherGauge.Inc() watcherGauge.Inc()
cancel := cancelFunc(func() { return wa, func() { s.cancelWatcher(wa) }
}
// cancelWatcher removes references of the watcher from the watchableStore
func (s *watchableStore) cancelWatcher(wa *watcher) {
for {
s.mu.Lock() s.mu.Lock()
// remove references of the watcher
if s.unsynced.delete(wa) { if s.unsynced.delete(wa) {
slowWatcherGauge.Dec() slowWatcherGauge.Dec()
watcherGauge.Dec() break
} else if s.synced.delete(wa) { } else if s.synced.delete(wa) {
watcherGauge.Dec() break
} else { } else if wa.compacted {
break
}
if !wa.victim {
panic("watcher not victim but not in watch groups")
}
var victimBatch watcherBatch
for _, wb := range s.victims { for _, wb := range s.victims {
if wb[wa] != nil { if wb[wa] != nil {
slowWatcherGauge.Dec() victimBatch = wb
watcherGauge.Dec()
delete(wb, wa)
break break
} }
} }
if victimBatch != nil {
slowWatcherGauge.Dec()
delete(victimBatch, wa)
break
} }
// victim being processed so not accessible; retry
s.mu.Unlock() s.mu.Unlock()
time.Sleep(time.Millisecond)
}
// If we cannot find it, it should have finished watch. watcherGauge.Dec()
}) s.mu.Unlock()
return wa, cancel
} }
// syncWatchersLoop syncs the watcher in the unsynced map every 100ms. // syncWatchersLoop syncs the watcher in the unsynced map every 100ms.
@ -306,8 +323,10 @@ func (s *watchableStore) moveVictims() (moved int) {
for _, wb := range victims { for _, wb := range victims {
// try to send responses again // try to send responses again
for w, eb := range wb { for w, eb := range wb {
// watcher has observed the store up to, but not including, w.minRev
rev := w.minRev - 1
select { select {
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: w.cur}: case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}:
pendingEventsGauge.Add(float64(len(eb.evs))) pendingEventsGauge.Add(float64(len(eb.evs)))
default: default:
if newVictim == nil { if newVictim == nil {
@ -328,10 +347,11 @@ func (s *watchableStore) moveVictims() (moved int) {
// couldn't send watch response; stays victim // couldn't send watch response; stays victim
continue continue
} }
w.victim = false
if eb.moreRev != 0 { if eb.moreRev != 0 {
w.cur = eb.moreRev w.minRev = eb.moreRev
} }
if w.cur < curRev { if w.minRev <= curRev {
s.unsynced.add(w) s.unsynced.add(w)
} else { } else {
slowWatcherGauge.Dec() slowWatcherGauge.Dec()
@ -385,17 +405,20 @@ func (s *watchableStore) syncWatchers() {
var victims watcherBatch var victims watcherBatch
wb := newWatcherBatch(wg, evs) wb := newWatcherBatch(wg, evs)
for w := range wg.watchers { for w := range wg.watchers {
w.minRev = curRev + 1
eb, ok := wb[w] eb, ok := wb[w]
if !ok { if !ok {
// bring un-notified watcher to synced // bring un-notified watcher to synced
w.cur = curRev
s.synced.add(w) s.synced.add(w)
s.unsynced.delete(w) s.unsynced.delete(w)
continue continue
} }
w.cur = curRev if eb.moreRev != 0 {
isBlocked := false w.minRev = eb.moreRev
}
select { select {
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: curRev}: case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: curRev}:
pendingEventsGauge.Add(float64(len(eb.evs))) pendingEventsGauge.Add(float64(len(eb.evs)))
@ -403,14 +426,14 @@ func (s *watchableStore) syncWatchers() {
if victims == nil { if victims == nil {
victims = make(watcherBatch) victims = make(watcherBatch)
} }
isBlocked = true w.victim = true
} }
if isBlocked { if w.victim {
victims[w] = eb victims[w] = eb
} else { } else {
if eb.moreRev != 0 { if eb.moreRev != 0 {
w.cur = eb.moreRev // stay unsynced; more to read
continue continue
} }
s.synced.add(w) s.synced.add(w)
@ -458,14 +481,15 @@ func (s *watchableStore) notify(rev int64, evs []mvccpb.Event) {
plog.Panicf("unexpected multiple revisions in notification") plog.Panicf("unexpected multiple revisions in notification")
} }
select { select {
case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: s.Rev()}: case w.ch <- WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}:
pendingEventsGauge.Add(float64(len(eb.evs))) pendingEventsGauge.Add(float64(len(eb.evs)))
default: default:
// move slow watcher to victims // move slow watcher to victims
w.cur = rev w.minRev = rev + 1
if victim == nil { if victim == nil {
victim = make(watcherBatch) victim = make(watcherBatch)
} }
w.victim = true
victim[w] = eb victim[w] = eb
s.synced.delete(w) s.synced.delete(w)
slowWatcherGauge.Inc() slowWatcherGauge.Inc()
@ -508,11 +532,14 @@ type watcher struct {
// If end is set, the watcher is on a range. // If end is set, the watcher is on a range.
end []byte end []byte
// cur is the current watcher revision of a unsynced watcher. // victim is set when ch is blocked and undergoing victim processing
// cur will be updated for unsynced watcher while it is catching up. victim bool
// cur is startRev of a synced watcher.
// cur will not be updated for synced watcher. // compacted is set when the watcher is removed because of compaction
cur int64 compacted bool
// minRev is the minimum revision update the watcher will accept
minRev int64
id WatchID id WatchID
// a chan to send out the watch response. // a chan to send out the watch response.

View File

@ -193,8 +193,8 @@ func TestSyncWatchers(t *testing.T) {
} }
for w := range sws { for w := range sws {
if w.cur != s.Rev() { if w.minRev != s.Rev()+1 {
t.Errorf("w.cur = %d, want %d", w.cur, s.Rev()) t.Errorf("w.minRev = %d, want %d", w.minRev, s.Rev()+1)
} }
} }

View File

@ -81,7 +81,7 @@ func newWatcherBatch(wg *watcherGroup, evs []mvccpb.Event) watcherBatch {
wb := make(watcherBatch) wb := make(watcherBatch)
for _, ev := range evs { for _, ev := range evs {
for w := range wg.watcherSetByKey(string(ev.Kv.Key)) { for w := range wg.watcherSetByKey(string(ev.Kv.Key)) {
if ev.Kv.ModRevision >= w.cur { if ev.Kv.ModRevision >= w.minRev {
// don't double notify // don't double notify
wb.add(w, ev) wb.add(w, ev)
} }
@ -233,20 +233,21 @@ func (wg *watcherGroup) choose(maxWatchers int, curRev, compactRev int64) (*watc
func (wg *watcherGroup) chooseAll(curRev, compactRev int64) int64 { func (wg *watcherGroup) chooseAll(curRev, compactRev int64) int64 {
minRev := int64(math.MaxInt64) minRev := int64(math.MaxInt64)
for w := range wg.watchers { for w := range wg.watchers {
if w.cur > curRev { if w.minRev > curRev {
panic("watcher current revision should not exceed current revision") panic("watcher current revision should not exceed current revision")
} }
if w.cur < compactRev { if w.minRev < compactRev {
select { select {
case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactRev}: case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactRev}:
w.compacted = true
wg.delete(w) wg.delete(w)
default: default:
// retry next time // retry next time
} }
continue continue
} }
if minRev > w.cur { if minRev > w.minRev {
minRev = w.cur minRev = w.minRev
} }
} }
return minRev return minRev