etcd/raft/quorum/majority.go
Tobias Schottdorf b67303c6a2 raft: allow use of joint quorums
This change introduces joint quorums by changing the Node and RawNode
API to accept pb.ConfChangeV2 (on top of pb.ConfChange).

pb.ConfChange continues to work as today: it allows carrying out a
single configuration change. A pb.ConfChange proposal gets added to
the Raft log as such and is thus also observed by the app during Ready
handling, and fed back to ApplyConfChange.

ConfChangeV2 allows joint configuration changes but will continue to
carry out configuration changes in "one phase" (i.e. without ever
entering a joint config) when this is possible.
2019-07-23 10:40:03 +02:00

213 lines
5.5 KiB
Go

// Copyright 2019 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package quorum
import (
"fmt"
"math"
"sort"
"strings"
)
// MajorityConfig is a set of IDs that uses majority quorums to make decisions.
type MajorityConfig map[uint64]struct{}
func (c MajorityConfig) String() string {
sl := make([]uint64, 0, len(c))
for id := range c {
sl = append(sl, id)
}
sort.Slice(sl, func(i, j int) bool { return sl[i] < sl[j] })
var buf strings.Builder
buf.WriteByte('(')
for i := range sl {
if i > 0 {
buf.WriteByte(' ')
}
fmt.Fprint(&buf, sl[i])
}
buf.WriteByte(')')
return buf.String()
}
// Describe returns a (multi-line) representation of the commit indexes for the
// given lookuper.
func (c MajorityConfig) Describe(l AckedIndexer) string {
if len(c) == 0 {
return "<empty majority quorum>"
}
type tup struct {
id uint64
idx Index
ok bool // idx found?
bar int // length of bar displayed for this tup
}
// Below, populate .bar so that the i-th largest commit index has bar i (we
// plot this as sort of a progress bar). The actual code is a bit more
// complicated and also makes sure that equal index => equal bar.
n := len(c)
info := make([]tup, 0, n)
for id := range c {
idx, ok := l.AckedIndex(id)
info = append(info, tup{id: id, idx: idx, ok: ok})
}
// Sort by index
sort.Slice(info, func(i, j int) bool {
if info[i].idx == info[j].idx {
return info[i].id < info[j].id
}
return info[i].idx < info[j].idx
})
// Populate .bar.
for i := range info {
if i > 0 && info[i-1].idx < info[i].idx {
info[i].bar = i
}
}
// Sort by ID.
sort.Slice(info, func(i, j int) bool {
return info[i].id < info[j].id
})
var buf strings.Builder
// Print.
fmt.Fprint(&buf, strings.Repeat(" ", n)+" idx\n")
for i := range info {
bar := info[i].bar
if !info[i].ok {
fmt.Fprint(&buf, "?"+strings.Repeat(" ", n))
} else {
fmt.Fprint(&buf, strings.Repeat("x", bar)+">"+strings.Repeat(" ", n-bar))
}
fmt.Fprintf(&buf, " %5d (id=%d)\n", info[i].idx, info[i].id)
}
return buf.String()
}
// Slice returns the MajorityConfig as a sorted slice.
func (c MajorityConfig) Slice() []uint64 {
var sl []uint64
for id := range c {
sl = append(sl, id)
}
sort.Slice(sl, func(i, j int) bool { return sl[i] < sl[j] })
return sl
}
type uint64Slice []uint64
func insertionSort(sl uint64Slice) {
a, b := 0, len(sl)
for i := a + 1; i < b; i++ {
for j := i; j > a && sl[j] < sl[j-1]; j-- {
sl[j], sl[j-1] = sl[j-1], sl[j]
}
}
}
// CommittedIndex computes the committed index from those supplied via the
// provided AckedIndexer (for the active config).
func (c MajorityConfig) CommittedIndex(l AckedIndexer) Index {
n := len(c)
if n == 0 {
// This plays well with joint quorums which, when one half is the zero
// MajorityConfig, should behave like the other half.
return math.MaxUint64
}
// Use an on-stack slice to collect the committed indexes when n <= 7
// (otherwise we alloc). The alternative is to stash a slice on
// MajorityConfig, but this impairs usability (as is, MajorityConfig is just
// a map, and that's nice). The assumption is that running with a
// replication factor of >7 is rare, and in cases in which it happens
// performance is a lesser concern (additionally the performance
// implications of an allocation here are far from drastic).
var stk [7]uint64
srt := uint64Slice(stk[:])
if cap(srt) < n {
srt = make([]uint64, n)
}
srt = srt[:n]
{
// Fill the slice with the indexes observed. Any unused slots will be
// left as zero; these correspond to voters that may report in, but
// haven't yet. We fill from the right (since the zeroes will end up on
// the left after sorting below anyway).
i := n - 1
for id := range c {
if idx, ok := l.AckedIndex(id); ok {
srt[i] = uint64(idx)
i--
}
}
}
// Sort by index. Use a bespoke algorithm (copied from the stdlib's sort
// package) to keep srt on the stack.
insertionSort(srt)
// The smallest index into the array for which the value is acked by a
// quorum. In other words, from the end of the slice, move n/2+1 to the
// left (accounting for zero-indexing).
pos := n - (n/2 + 1)
return Index(srt[pos])
}
// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
// a result indicating whether the vote is pending (i.e. neither a quorum of
// yes/no has been reached), won (a quorum of yes has been reached), or lost (a
// quorum of no has been reached).
func (c MajorityConfig) VoteResult(votes map[uint64]bool) VoteResult {
if len(c) == 0 {
// By convention, the elections on an empty config win. This comes in
// handy with joint quorums because it'll make a half-populated joint
// quorum behave like a majority quorum.
return VoteWon
}
ny := [2]int{} // vote counts for no and yes, respectively
var missing int
for id := range c {
v, ok := votes[id]
if !ok {
missing++
continue
}
if v {
ny[1]++
} else {
ny[0]++
}
}
q := len(c)/2 + 1
if ny[1] >= q {
return VoteWon
}
if ny[1]+missing >= q {
return VotePending
}
return VoteLost
}