mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
raft: internally support joint consensus
This commit introduces machinery to safely apply joint consensus configuration changes to Raft. The main contribution is the new package, `confchange`, which offers the primitives `Simple`, `EnterJoint`, and `LeaveJoint`. The first two take a list of configuration changes. `Simple` only declares success if these configuration changes (applied atomically) change the set of voters by at most one (i.e. it's fine to add or remove any number of learners, but change only one voter). `EnterJoint` makes the configuration joint and then applies the changes to it, in preparation of the caller returning later and transitioning out of the joint config into the final desired configuration via `LeaveJoint()`. This commit streamlines the conversion between voters and learners, which is now generally allowed whenever the above conditions are upheld (i.e. it's not possible to demote a voter and add a new voter in the context of a Simple configuration change, but it is possible via EnterJoint). Previously, we had the artificial restriction that a voter could not be demoted to a learner, but had to be removed first. Even though demoting a learner is generally less useful than promoting a learner (the latter is used to catch up future voters), demotions could see use in improved handling of temporary node unavailability, where it is desired to remove voting power from a down node, but to preserve its data should it return. An additional change that was made in this commit is to prevent the use of empty commit quorums, which was previously possible but for no good reason; this: Closes #10884. The work left to do in a future PR is to actually expose joint configurations to the applications using Raft. This will entail mostly API design and the addition of suitable testing, which to be carried out ergonomically is likely to motivate a larger refactor. Touches #7625.
This commit is contained in:
parent
5a734e79f5
commit
aa158f36b9
420
raft/confchange/confchange.go
Normal file
420
raft/confchange/confchange.go
Normal file
@ -0,0 +1,420 @@
|
||||
// Copyright 2019 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package confchange
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"go.etcd.io/etcd/raft/quorum"
|
||||
pb "go.etcd.io/etcd/raft/raftpb"
|
||||
"go.etcd.io/etcd/raft/tracker"
|
||||
)
|
||||
|
||||
// Changer facilitates configuration changes. It exposes methods to handle
|
||||
// simple and joint consensus while performing the proper validation that allows
|
||||
// refusing invalid configuration changes before they affect the active
|
||||
// configuration.
|
||||
type Changer struct {
|
||||
Tracker tracker.ProgressTracker
|
||||
LastIndex uint64
|
||||
}
|
||||
|
||||
// EnterJoint verifies that the outgoing (=right) majority config of the joint
|
||||
// config is empty and initializes it with a copy of the incoming (=left)
|
||||
// majority config. That is, it transitions from
|
||||
//
|
||||
// (1 2 3)&&()
|
||||
// to
|
||||
// (1 2 3)&&(1 2 3).
|
||||
//
|
||||
// The supplied ConfChanges are then applied to the incoming majority config,
|
||||
// resulting in a joint configuration that in terms of the Raft thesis[1]
|
||||
// (Section 4.3) corresponds to `C_{new,old}`.
|
||||
//
|
||||
// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
|
||||
func (c Changer) EnterJoint(ccs ...pb.ConfChange) (tracker.Config, tracker.ProgressMap, error) {
|
||||
cfg, prs, err := c.checkAndCopy()
|
||||
if err != nil {
|
||||
return c.err(err)
|
||||
}
|
||||
if joint(cfg) {
|
||||
err := errors.New("config is already joint")
|
||||
return c.err(err)
|
||||
}
|
||||
if len(incoming(cfg.Voters)) == 0 {
|
||||
// We allow adding nodes to an empty config for convenience (testing and
|
||||
// bootstrap), but you can't enter a joint state.
|
||||
err := errors.New("can't make a zero-voter config joint")
|
||||
return c.err(err)
|
||||
}
|
||||
// Clear the outgoing config.
|
||||
{
|
||||
*outgoingPtr(&cfg.Voters) = quorum.MajorityConfig{}
|
||||
|
||||
}
|
||||
// Copy incoming to outgoing.
|
||||
for id := range incoming(cfg.Voters) {
|
||||
outgoing(cfg.Voters)[id] = struct{}{}
|
||||
}
|
||||
|
||||
if err := c.apply(&cfg, prs, ccs...); err != nil {
|
||||
return c.err(err)
|
||||
}
|
||||
|
||||
return checkAndReturn(cfg, prs)
|
||||
}
|
||||
|
||||
// LeaveJoint transitions out of a joint configuration. It is an error to call
|
||||
// this method if the configuration is not joint, i.e. if the outgoing majority
|
||||
// config Voters[1] is empty.
|
||||
//
|
||||
// The outgoing majority config of the joint configuration will be removed,
|
||||
// that is, the incoming config is promoted as the sole decision maker. In the
|
||||
// notation of the Raft thesis[1] (Section 4.3), this method transitions from
|
||||
// `C_{new,old}` into `C_new`.
|
||||
//
|
||||
// At the same time, any staged learners (LearnersNext) the addition of which
|
||||
// was held back by an overlapping voter in the former outgoing config will be
|
||||
// inserted into Learners.
|
||||
//
|
||||
// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
|
||||
func (c Changer) LeaveJoint() (tracker.Config, tracker.ProgressMap, error) {
|
||||
cfg, prs, err := c.checkAndCopy()
|
||||
if err != nil {
|
||||
return c.err(err)
|
||||
}
|
||||
if !joint(cfg) {
|
||||
err := errors.New("can't leave a non-joint config")
|
||||
return c.err(err)
|
||||
}
|
||||
if len(outgoing(cfg.Voters)) == 0 {
|
||||
err := fmt.Errorf("configuration is not joint: %v", cfg)
|
||||
return c.err(err)
|
||||
}
|
||||
for id := range cfg.LearnersNext {
|
||||
nilAwareAdd(&cfg.Learners, id)
|
||||
prs[id].IsLearner = true
|
||||
}
|
||||
cfg.LearnersNext = nil
|
||||
|
||||
for id := range outgoing(cfg.Voters) {
|
||||
_, isVoter := incoming(cfg.Voters)[id]
|
||||
_, isLearner := cfg.Learners[id]
|
||||
|
||||
if !isVoter && !isLearner {
|
||||
delete(prs, id)
|
||||
}
|
||||
}
|
||||
*outgoingPtr(&cfg.Voters) = nil
|
||||
|
||||
return checkAndReturn(cfg, prs)
|
||||
}
|
||||
|
||||
// Simple carries out a series of configuration changes that (in aggregate)
|
||||
// mutates the incoming majority config Voters[0] by at most one. This method
|
||||
// will return an error if that is not the case, if the resulting quorum is
|
||||
// zero, or if the configuration is in a joint state (i.e. if there is an
|
||||
// outgoing configuration).
|
||||
func (c Changer) Simple(ccs ...pb.ConfChange) (tracker.Config, tracker.ProgressMap, error) {
|
||||
cfg, prs, err := c.checkAndCopy()
|
||||
if err != nil {
|
||||
return c.err(err)
|
||||
}
|
||||
if joint(cfg) {
|
||||
err := errors.New("can't apply simple config change in joint config")
|
||||
return c.err(err)
|
||||
}
|
||||
if err := c.apply(&cfg, prs, ccs...); err != nil {
|
||||
return c.err(err)
|
||||
}
|
||||
if n := symdiff(incoming(c.Tracker.Voters), incoming(cfg.Voters)); n > 1 {
|
||||
return tracker.Config{}, nil, errors.New("more than voter changed without entering joint config")
|
||||
}
|
||||
if err := checkInvariants(cfg, prs); err != nil {
|
||||
return tracker.Config{}, tracker.ProgressMap{}, nil
|
||||
}
|
||||
|
||||
return checkAndReturn(cfg, prs)
|
||||
}
|
||||
|
||||
// apply a ConfChange to the configuration. By convention, changes to voters are
|
||||
// always made to the incoming majority config Voters[0]. Voters[1] is either
|
||||
// empty or preserves the outgoing majority configuration while in a joint state.
|
||||
func (c Changer) apply(cfg *tracker.Config, prs tracker.ProgressMap, ccs ...pb.ConfChange) error {
|
||||
for _, cc := range ccs {
|
||||
if cc.NodeID == 0 {
|
||||
// etcd replaces the NodeID with zero if it decides (downstream of
|
||||
// raft) to not apply a ConfChange, so we have to have explicit code
|
||||
// here to ignore these.
|
||||
continue
|
||||
}
|
||||
switch cc.Type {
|
||||
case pb.ConfChangeAddNode:
|
||||
c.makeVoter(cfg, prs, cc.NodeID)
|
||||
case pb.ConfChangeAddLearnerNode:
|
||||
c.makeLearner(cfg, prs, cc.NodeID)
|
||||
case pb.ConfChangeRemoveNode:
|
||||
c.remove(cfg, prs, cc.NodeID)
|
||||
case pb.ConfChangeUpdateNode:
|
||||
default:
|
||||
return fmt.Errorf("unexpected conf type %d", cc.Type)
|
||||
}
|
||||
}
|
||||
if len(incoming(cfg.Voters)) == 0 {
|
||||
return errors.New("removed all voters")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// makeVoter adds or promotes the given ID to be a voter in the incoming
|
||||
// majority config.
|
||||
func (c Changer) makeVoter(cfg *tracker.Config, prs tracker.ProgressMap, id uint64) {
|
||||
pr := prs[id]
|
||||
if pr == nil {
|
||||
c.initProgress(cfg, prs, id, false /* isLearner */)
|
||||
return
|
||||
}
|
||||
|
||||
pr.IsLearner = false
|
||||
nilAwareDelete(&cfg.Learners, id)
|
||||
nilAwareDelete(&cfg.LearnersNext, id)
|
||||
incoming(cfg.Voters)[id] = struct{}{}
|
||||
return
|
||||
}
|
||||
|
||||
// makeLearner makes the given ID a learner or stages it to be a learner once
|
||||
// an active joint configuration is exited.
|
||||
//
|
||||
// The former happens when the peer is not a part of the outgoing config, in
|
||||
// which case we either add a new learner or demote a voter in the incoming
|
||||
// config.
|
||||
//
|
||||
// The latter case occurs when the configuration is joint and the peer is a
|
||||
// voter in the outgoing config. In that case, we do not want to add the peer
|
||||
// as a learner because then we'd have to track a peer as a voter and learner
|
||||
// simultaneously. Instead, we add the learner to LearnersNext, so that it will
|
||||
// be added to Learners the moment the outgoing config is removed by
|
||||
// LeaveJoint().
|
||||
func (c Changer) makeLearner(cfg *tracker.Config, prs tracker.ProgressMap, id uint64) {
|
||||
pr := prs[id]
|
||||
if pr == nil {
|
||||
c.initProgress(cfg, prs, id, true /* isLearner */)
|
||||
return
|
||||
}
|
||||
if pr.IsLearner {
|
||||
return
|
||||
}
|
||||
// Remove any existing voter in the incoming config...
|
||||
c.remove(cfg, prs, id)
|
||||
// ... but save the Progress.
|
||||
prs[id] = pr
|
||||
// Use LearnersNext if we can't add the learner to Learners directly, i.e.
|
||||
// if the peer is still tracked as a voter in the outgoing config. It will
|
||||
// be turned into a learner in LeaveJoint().
|
||||
//
|
||||
// Otherwise, add a regular learner right away.
|
||||
if _, onRight := outgoing(cfg.Voters)[id]; onRight {
|
||||
nilAwareAdd(&cfg.LearnersNext, id)
|
||||
} else {
|
||||
pr.IsLearner = true
|
||||
nilAwareAdd(&cfg.Learners, id)
|
||||
}
|
||||
}
|
||||
|
||||
// remove this peer as a voter or learner from the incoming config.
|
||||
func (c Changer) remove(cfg *tracker.Config, prs tracker.ProgressMap, id uint64) {
|
||||
if _, ok := prs[id]; !ok {
|
||||
return
|
||||
}
|
||||
|
||||
delete(incoming(cfg.Voters), id)
|
||||
nilAwareDelete(&cfg.Learners, id)
|
||||
nilAwareDelete(&cfg.LearnersNext, id)
|
||||
|
||||
// If the peer is still a voter in the outgoing config, keep the Progress.
|
||||
if _, onRight := outgoing(cfg.Voters)[id]; !onRight {
|
||||
delete(prs, id)
|
||||
}
|
||||
}
|
||||
|
||||
// initProgress initializes a new progress for the given node or learner.
|
||||
func (c Changer) initProgress(cfg *tracker.Config, prs tracker.ProgressMap, id uint64, isLearner bool) {
|
||||
if !isLearner {
|
||||
incoming(cfg.Voters)[id] = struct{}{}
|
||||
} else {
|
||||
nilAwareAdd(&cfg.Learners, id)
|
||||
}
|
||||
prs[id] = &tracker.Progress{
|
||||
// We initialize Progress.Next with lastIndex+1 so that the peer will be
|
||||
// probed without an index first.
|
||||
//
|
||||
// TODO(tbg): verify that, this is just my best guess.
|
||||
Next: c.LastIndex + 1,
|
||||
Match: 0,
|
||||
Inflights: tracker.NewInflights(c.Tracker.MaxInflight),
|
||||
IsLearner: isLearner,
|
||||
// When a node is first added, we should mark it as recently active.
|
||||
// Otherwise, CheckQuorum may cause us to step down if it is invoked
|
||||
// before the added node has had a chance to communicate with us.
|
||||
RecentActive: true,
|
||||
}
|
||||
}
|
||||
|
||||
// checkInvariants makes sure that the config and progress are compatible with
|
||||
// each other. This is used to check both what the Changer is initialized with,
|
||||
// as well as what it returns.
|
||||
func checkInvariants(cfg tracker.Config, prs tracker.ProgressMap) error {
|
||||
// NB: intentionally allow the empty config. In production we'll never see a
|
||||
// non-empty config (we prevent it from being created) but we will need to
|
||||
// be able to *create* an initial config, for example during bootstrap (or
|
||||
// during tests). Instead of having to hand-code this, we allow
|
||||
// transitioning from an empty config into any other legal and non-empty
|
||||
// config.
|
||||
for _, ids := range []map[uint64]struct{}{
|
||||
cfg.Voters.IDs(),
|
||||
cfg.Learners,
|
||||
cfg.LearnersNext,
|
||||
} {
|
||||
for id := range ids {
|
||||
if _, ok := prs[id]; !ok {
|
||||
return fmt.Errorf("no progress for %d", id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Any staged learner was staged because it could not be directly added due
|
||||
// to a conflicting voter in the outgoing config.
|
||||
for id := range cfg.LearnersNext {
|
||||
if _, ok := outgoing(cfg.Voters)[id]; !ok {
|
||||
return fmt.Errorf("%d is in LearnersNext, but not Voters[1]", id)
|
||||
}
|
||||
if prs[id].IsLearner {
|
||||
return fmt.Errorf("%d is in LearnersNext, but is already marked as learner", id)
|
||||
}
|
||||
}
|
||||
// Conversely Learners and Voters doesn't intersect at all.
|
||||
for id := range cfg.Learners {
|
||||
if _, ok := outgoing(cfg.Voters)[id]; ok {
|
||||
return fmt.Errorf("%d is in Learners and Voters[1]", id)
|
||||
}
|
||||
if _, ok := incoming(cfg.Voters)[id]; ok {
|
||||
return fmt.Errorf("%d is in Learners and Voters[0]", id)
|
||||
}
|
||||
if !prs[id].IsLearner {
|
||||
return fmt.Errorf("%d is in Learners, but is not marked as learner", id)
|
||||
}
|
||||
}
|
||||
|
||||
if !joint(cfg) {
|
||||
// We enforce that empty maps are nil instead of zero.
|
||||
if outgoing(cfg.Voters) != nil {
|
||||
return fmt.Errorf("Voters[1] must be nil when not joint")
|
||||
}
|
||||
if cfg.LearnersNext != nil {
|
||||
return fmt.Errorf("LearnersNext must be nil when not joint")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkAndCopy copies the tracker's config and progress map (deeply enough for
|
||||
// the purposes of the Changer) and returns those copies. It returns an error
|
||||
// if checkInvariants does.
|
||||
func (c Changer) checkAndCopy() (tracker.Config, tracker.ProgressMap, error) {
|
||||
cfg := c.Tracker.Config.Clone()
|
||||
prs := tracker.ProgressMap{}
|
||||
|
||||
for id, pr := range c.Tracker.Progress {
|
||||
// A shallow copy is enough because we only mutate the Learner field.
|
||||
ppr := *pr
|
||||
prs[id] = &ppr
|
||||
}
|
||||
return checkAndReturn(cfg, prs)
|
||||
}
|
||||
|
||||
// checkAndReturn calls checkInvariants on the input and returns either the
|
||||
// resulting error or the input.
|
||||
func checkAndReturn(cfg tracker.Config, prs tracker.ProgressMap) (tracker.Config, tracker.ProgressMap, error) {
|
||||
if err := checkInvariants(cfg, prs); err != nil {
|
||||
return tracker.Config{}, tracker.ProgressMap{}, err
|
||||
}
|
||||
return cfg, prs, nil
|
||||
}
|
||||
|
||||
// err returns zero values and an error.
|
||||
func (c Changer) err(err error) (tracker.Config, tracker.ProgressMap, error) {
|
||||
return tracker.Config{}, nil, err
|
||||
}
|
||||
|
||||
// nilAwareAdd populates a map entry, creating the map if necessary.
|
||||
func nilAwareAdd(m *map[uint64]struct{}, id uint64) {
|
||||
if *m == nil {
|
||||
*m = map[uint64]struct{}{}
|
||||
}
|
||||
(*m)[id] = struct{}{}
|
||||
}
|
||||
|
||||
// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after.
|
||||
func nilAwareDelete(m *map[uint64]struct{}, id uint64) {
|
||||
if *m == nil {
|
||||
return
|
||||
}
|
||||
delete(*m, id)
|
||||
if len(*m) == 0 {
|
||||
*m = nil
|
||||
}
|
||||
}
|
||||
|
||||
// symdiff returns the count of the symmetric difference between the sets of
|
||||
// uint64s, i.e. len( (l - r) \union (r - l)).
|
||||
func symdiff(l, r map[uint64]struct{}) int {
|
||||
var n int
|
||||
pairs := [][2]quorum.MajorityConfig{
|
||||
{l, r}, // count elems in l but not in r
|
||||
{r, l}, // count elems in r but not in l
|
||||
}
|
||||
for _, p := range pairs {
|
||||
for id := range p[0] {
|
||||
if _, ok := p[1][id]; !ok {
|
||||
n++
|
||||
}
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func joint(cfg tracker.Config) bool {
|
||||
return len(outgoing(cfg.Voters)) > 0
|
||||
}
|
||||
|
||||
func incoming(voters quorum.JointConfig) quorum.MajorityConfig { return voters[0] }
|
||||
func outgoing(voters quorum.JointConfig) quorum.MajorityConfig { return voters[1] }
|
||||
func outgoingPtr(voters *quorum.JointConfig) *quorum.MajorityConfig { return &voters[1] }
|
||||
|
||||
// Describe prints the type and NodeID of the configuration changes as a
|
||||
// space-delimited string.
|
||||
func Describe(ccs ...pb.ConfChange) string {
|
||||
var buf strings.Builder
|
||||
for _, cc := range ccs {
|
||||
if buf.Len() > 0 {
|
||||
buf.WriteByte(' ')
|
||||
}
|
||||
fmt.Fprintf(&buf, "%s(%d)", cc.Type, cc.NodeID)
|
||||
}
|
||||
return buf.String()
|
||||
}
|
105
raft/confchange/datadriven_test.go
Normal file
105
raft/confchange/datadriven_test.go
Normal file
@ -0,0 +1,105 @@
|
||||
// Copyright 2019 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package confchange
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/cockroachdb/datadriven"
|
||||
pb "go.etcd.io/etcd/raft/raftpb"
|
||||
"go.etcd.io/etcd/raft/tracker"
|
||||
)
|
||||
|
||||
func TestConfChangeDataDriven(t *testing.T) {
|
||||
datadriven.Walk(t, "testdata", func(t *testing.T, path string) {
|
||||
tr := tracker.MakeProgressTracker(10)
|
||||
c := Changer{
|
||||
Tracker: tr,
|
||||
LastIndex: 0, // incremented in this test with each cmd
|
||||
}
|
||||
|
||||
// The test files use the commands
|
||||
// - simple: run a simple conf change (i.e. no joint consensus),
|
||||
// - enter-joint: enter a joint config, and
|
||||
// - leave-joint: leave a joint config.
|
||||
// The first two take a list of config changes, which have the following
|
||||
// syntax:
|
||||
// - vn: make n a voter,
|
||||
// - ln: make n a learner,
|
||||
// - rn: remove n, and
|
||||
// - un: update n.
|
||||
datadriven.RunTest(t, path, func(d *datadriven.TestData) string {
|
||||
defer func() {
|
||||
c.LastIndex++
|
||||
}()
|
||||
var ccs []pb.ConfChange
|
||||
toks := strings.Split(strings.TrimSpace(d.Input), " ")
|
||||
if toks[0] == "" {
|
||||
toks = nil
|
||||
}
|
||||
for _, tok := range toks {
|
||||
if len(tok) < 2 {
|
||||
return fmt.Sprintf("unknown token %s", tok)
|
||||
}
|
||||
var cc pb.ConfChange
|
||||
switch tok[0] {
|
||||
case 'v':
|
||||
cc.Type = pb.ConfChangeAddNode
|
||||
case 'l':
|
||||
cc.Type = pb.ConfChangeAddLearnerNode
|
||||
case 'r':
|
||||
cc.Type = pb.ConfChangeRemoveNode
|
||||
case 'u':
|
||||
cc.Type = pb.ConfChangeUpdateNode
|
||||
default:
|
||||
return fmt.Sprintf("unknown input: %s", tok)
|
||||
}
|
||||
id, err := strconv.ParseUint(tok[1:], 10, 64)
|
||||
if err != nil {
|
||||
return err.Error()
|
||||
}
|
||||
cc.NodeID = id
|
||||
ccs = append(ccs, cc)
|
||||
}
|
||||
|
||||
var cfg tracker.Config
|
||||
var prs tracker.ProgressMap
|
||||
var err error
|
||||
switch d.Cmd {
|
||||
case "simple":
|
||||
cfg, prs, err = c.Simple(ccs...)
|
||||
case "enter-joint":
|
||||
cfg, prs, err = c.EnterJoint(ccs...)
|
||||
case "leave-joint":
|
||||
if len(ccs) > 0 {
|
||||
err = errors.New("this command takes no input")
|
||||
} else {
|
||||
cfg, prs, err = c.LeaveJoint()
|
||||
}
|
||||
default:
|
||||
return "unknown command"
|
||||
}
|
||||
if err != nil {
|
||||
return err.Error() + "\n"
|
||||
}
|
||||
c.Tracker.Config, c.Tracker.Progress = cfg, prs
|
||||
return fmt.Sprintf("%s\n%s", c.Tracker.Config, c.Tracker.Progress)
|
||||
})
|
||||
})
|
||||
}
|
168
raft/confchange/quick_test.go
Normal file
168
raft/confchange/quick_test.go
Normal file
@ -0,0 +1,168 @@
|
||||
// Copyright 2019 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package confchange
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"testing"
|
||||
"testing/quick"
|
||||
|
||||
pb "go.etcd.io/etcd/raft/raftpb"
|
||||
"go.etcd.io/etcd/raft/tracker"
|
||||
)
|
||||
|
||||
// TestConfChangeQuick uses quickcheck to verify that simple and joint config
|
||||
// changes arrive at the same result.
|
||||
func TestConfChangeQuick(t *testing.T) {
|
||||
cfg := &quick.Config{
|
||||
MaxCount: 1000,
|
||||
}
|
||||
|
||||
// Log the first couple of runs to give some indication of things working
|
||||
// as intended.
|
||||
const infoCount = 5
|
||||
|
||||
runWithJoint := func(c *Changer, ccs []pb.ConfChange) error {
|
||||
cfg, prs, err := c.EnterJoint(ccs...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.Tracker.Config = cfg
|
||||
c.Tracker.Progress = prs
|
||||
cfg, prs, err = c.LeaveJoint()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.Tracker.Config = cfg
|
||||
c.Tracker.Progress = prs
|
||||
return nil
|
||||
}
|
||||
|
||||
runWithSimple := func(c *Changer, ccs []pb.ConfChange) error {
|
||||
for _, cc := range ccs {
|
||||
cfg, prs, err := c.Simple(cc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.Tracker.Config, c.Tracker.Progress = cfg, prs
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type testFunc func(*Changer, []pb.ConfChange) error
|
||||
|
||||
wrapper := func(invoke testFunc) func(setup initialChanges, ccs confChanges) (*Changer, error) {
|
||||
return func(setup initialChanges, ccs confChanges) (*Changer, error) {
|
||||
tr := tracker.MakeProgressTracker(10)
|
||||
c := &Changer{
|
||||
Tracker: tr,
|
||||
LastIndex: 10,
|
||||
}
|
||||
|
||||
if err := runWithSimple(c, setup); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err := invoke(c, ccs)
|
||||
return c, err
|
||||
}
|
||||
}
|
||||
|
||||
var n int
|
||||
f1 := func(setup initialChanges, ccs confChanges) *Changer {
|
||||
c, err := wrapper(runWithSimple)(setup, ccs)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if n < infoCount {
|
||||
t.Log("initial setup:", Describe(setup...))
|
||||
t.Log("changes:", Describe(ccs...))
|
||||
t.Log(c.Tracker.Config)
|
||||
t.Log(c.Tracker.Progress)
|
||||
}
|
||||
n++
|
||||
return c
|
||||
}
|
||||
f2 := func(setup initialChanges, ccs confChanges) *Changer {
|
||||
c, err := wrapper(runWithJoint)(setup, ccs)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return c
|
||||
}
|
||||
err := quick.CheckEqual(f1, f2, cfg)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
cErr, ok := err.(*quick.CheckEqualError)
|
||||
if !ok {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Error("setup:", Describe(cErr.In[0].([]pb.ConfChange)...))
|
||||
t.Error("ccs:", Describe(cErr.In[1].([]pb.ConfChange)...))
|
||||
t.Errorf("out1: %+v\nout2: %+v", cErr.Out1, cErr.Out2)
|
||||
}
|
||||
|
||||
type confChangeTyp pb.ConfChangeType
|
||||
|
||||
func (confChangeTyp) Generate(rand *rand.Rand, _ int) reflect.Value {
|
||||
return reflect.ValueOf(confChangeTyp(rand.Intn(4)))
|
||||
}
|
||||
|
||||
type confChanges []pb.ConfChange
|
||||
|
||||
func genCC(num func() int, id func() uint64, typ func() pb.ConfChangeType) []pb.ConfChange {
|
||||
var ccs []pb.ConfChange
|
||||
n := num()
|
||||
for i := 0; i < n; i++ {
|
||||
ccs = append(ccs, pb.ConfChange{Type: typ(), NodeID: id()})
|
||||
}
|
||||
return ccs
|
||||
}
|
||||
|
||||
func (confChanges) Generate(rand *rand.Rand, _ int) reflect.Value {
|
||||
num := func() int {
|
||||
return 1 + rand.Intn(9)
|
||||
}
|
||||
id := func() uint64 {
|
||||
// Note that num() >= 1, so we're never returning 1 from this method,
|
||||
// meaning that we'll never touch NodeID one, which is special to avoid
|
||||
// voterless configs altogether in this test.
|
||||
return 1 + uint64(num())
|
||||
}
|
||||
typ := func() pb.ConfChangeType {
|
||||
return pb.ConfChangeType(rand.Intn(len(pb.ConfChangeType_name)))
|
||||
}
|
||||
return reflect.ValueOf(genCC(num, id, typ))
|
||||
}
|
||||
|
||||
type initialChanges []pb.ConfChange
|
||||
|
||||
func (initialChanges) Generate(rand *rand.Rand, _ int) reflect.Value {
|
||||
num := func() int {
|
||||
return 1 + rand.Intn(5)
|
||||
}
|
||||
id := func() uint64 { return uint64(num()) }
|
||||
typ := func() pb.ConfChangeType {
|
||||
return pb.ConfChangeAddNode
|
||||
}
|
||||
// NodeID one is special - it's in the initial config and will be a voter
|
||||
// always (this is to avoid uninteresting edge cases where the simple conf
|
||||
// changes can't easily make progress).
|
||||
ccs := append([]pb.ConfChange{{Type: pb.ConfChangeAddNode, NodeID: 1}}, genCC(num, id, typ)...)
|
||||
return reflect.ValueOf(ccs)
|
||||
}
|
23
raft/confchange/testdata/joint_idempotency.txt
vendored
Normal file
23
raft/confchange/testdata/joint_idempotency.txt
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
# Verify that operations upon entering the joint state are idempotent, i.e.
|
||||
# removing an absent node is fine, etc.
|
||||
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
|
||||
enter-joint
|
||||
r1 r2 r9 v2 v3 v4 v2 v3 v4 l2 l2 r4 r4 l1 l1
|
||||
----
|
||||
voters=(3)&&(1) learners=(2) learners_next=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2 learner
|
||||
3: StateProbe match=0 next=2
|
||||
|
||||
leave-joint
|
||||
----
|
||||
voters=(3) learners=(1 2)
|
||||
1: StateProbe match=0 next=1 learner
|
||||
2: StateProbe match=0 next=2 learner
|
||||
3: StateProbe match=0 next=2
|
24
raft/confchange/testdata/joint_learners_next.txt
vendored
Normal file
24
raft/confchange/testdata/joint_learners_next.txt
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
# Verify that when a voter is demoted in a joint config, it will show up in
|
||||
# learners_next until the joint config is left, and only then will the progress
|
||||
# turn into that of a learner, without resetting the progress. Note that this
|
||||
# last fact is verified by `next`, which can tell us which "round" the progress
|
||||
# was originally created in.
|
||||
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
|
||||
enter-joint
|
||||
v2 l1
|
||||
----
|
||||
voters=(2)&&(1) learners_next=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2
|
||||
|
||||
leave-joint
|
||||
----
|
||||
voters=(2) learners=(1)
|
||||
1: StateProbe match=0 next=1 learner
|
||||
2: StateProbe match=0 next=2
|
81
raft/confchange/testdata/joint_safety.txt
vendored
Normal file
81
raft/confchange/testdata/joint_safety.txt
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
leave-joint
|
||||
----
|
||||
can't leave a non-joint config
|
||||
|
||||
enter-joint
|
||||
----
|
||||
can't make a zero-voter config joint
|
||||
|
||||
enter-joint
|
||||
v1
|
||||
----
|
||||
can't make a zero-voter config joint
|
||||
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=4
|
||||
|
||||
leave-joint
|
||||
----
|
||||
can't leave a non-joint config
|
||||
|
||||
# Can enter into joint config.
|
||||
enter-joint
|
||||
----
|
||||
voters=(1)&&(1)
|
||||
1: StateProbe match=0 next=4
|
||||
|
||||
enter-joint
|
||||
----
|
||||
config is already joint
|
||||
|
||||
leave-joint
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=4
|
||||
|
||||
leave-joint
|
||||
----
|
||||
can't leave a non-joint config
|
||||
|
||||
# Can enter again, this time with some ops.
|
||||
enter-joint
|
||||
r1 v2 v3 l4
|
||||
----
|
||||
voters=(2 3)&&(1) learners=(4)
|
||||
1: StateProbe match=0 next=4
|
||||
2: StateProbe match=0 next=10
|
||||
3: StateProbe match=0 next=10
|
||||
4: StateProbe match=0 next=10 learner
|
||||
|
||||
enter-joint
|
||||
----
|
||||
config is already joint
|
||||
|
||||
enter-joint
|
||||
v12
|
||||
----
|
||||
config is already joint
|
||||
|
||||
simple
|
||||
l15
|
||||
----
|
||||
can't apply simple config change in joint config
|
||||
|
||||
leave-joint
|
||||
----
|
||||
voters=(2 3) learners=(4)
|
||||
2: StateProbe match=0 next=10
|
||||
3: StateProbe match=0 next=10
|
||||
4: StateProbe match=0 next=10 learner
|
||||
|
||||
simple
|
||||
l9
|
||||
----
|
||||
voters=(2 3) learners=(4 9)
|
||||
2: StateProbe match=0 next=10
|
||||
3: StateProbe match=0 next=10
|
||||
4: StateProbe match=0 next=10 learner
|
||||
9: StateProbe match=0 next=15 learner
|
69
raft/confchange/testdata/simple_idempotency.txt
vendored
Normal file
69
raft/confchange/testdata/simple_idempotency.txt
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
|
||||
simple
|
||||
v2
|
||||
----
|
||||
voters=(1 2)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=3
|
||||
|
||||
simple
|
||||
l1
|
||||
----
|
||||
voters=(2) learners=(1)
|
||||
1: StateProbe match=0 next=1 learner
|
||||
2: StateProbe match=0 next=3
|
||||
|
||||
simple
|
||||
l1
|
||||
----
|
||||
voters=(2) learners=(1)
|
||||
1: StateProbe match=0 next=1 learner
|
||||
2: StateProbe match=0 next=3
|
||||
|
||||
simple
|
||||
r1
|
||||
----
|
||||
voters=(2)
|
||||
2: StateProbe match=0 next=3
|
||||
|
||||
simple
|
||||
r1
|
||||
----
|
||||
voters=(2)
|
||||
2: StateProbe match=0 next=3
|
||||
|
||||
simple
|
||||
v3
|
||||
----
|
||||
voters=(2 3)
|
||||
2: StateProbe match=0 next=3
|
||||
3: StateProbe match=0 next=8
|
||||
|
||||
simple
|
||||
r3
|
||||
----
|
||||
voters=(2)
|
||||
2: StateProbe match=0 next=3
|
||||
|
||||
simple
|
||||
r3
|
||||
----
|
||||
voters=(2)
|
||||
2: StateProbe match=0 next=3
|
||||
|
||||
simple
|
||||
r4
|
||||
----
|
||||
voters=(2)
|
||||
2: StateProbe match=0 next=3
|
60
raft/confchange/testdata/simple_promote_demote.txt
vendored
Normal file
60
raft/confchange/testdata/simple_promote_demote.txt
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
# Set up three voters for this test.
|
||||
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
|
||||
simple
|
||||
v2
|
||||
----
|
||||
voters=(1 2)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2
|
||||
|
||||
simple
|
||||
v3
|
||||
----
|
||||
voters=(1 2 3)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2
|
||||
3: StateProbe match=0 next=3
|
||||
|
||||
# Can atomically demote and promote without a hitch.
|
||||
# This is pointless, but possible.
|
||||
simple
|
||||
l1 v1
|
||||
----
|
||||
voters=(1 2 3)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2
|
||||
3: StateProbe match=0 next=3
|
||||
|
||||
# Can demote a voter.
|
||||
simple
|
||||
l2
|
||||
----
|
||||
voters=(1 3) learners=(2)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2 learner
|
||||
3: StateProbe match=0 next=3
|
||||
|
||||
# Can atomically promote and demote the same voter.
|
||||
# This is pointless, but possible.
|
||||
simple
|
||||
v2 l2
|
||||
----
|
||||
voters=(1 3) learners=(2)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2 learner
|
||||
3: StateProbe match=0 next=3
|
||||
|
||||
# Can promote a voter.
|
||||
simple
|
||||
v2
|
||||
----
|
||||
voters=(1 2 3)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2
|
||||
3: StateProbe match=0 next=3
|
64
raft/confchange/testdata/simple_safety.txt
vendored
Normal file
64
raft/confchange/testdata/simple_safety.txt
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
simple
|
||||
l1
|
||||
----
|
||||
removed all voters
|
||||
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=2
|
||||
|
||||
simple
|
||||
v2 l3
|
||||
----
|
||||
voters=(1 2) learners=(3)
|
||||
1: StateProbe match=0 next=2
|
||||
2: StateProbe match=0 next=3
|
||||
3: StateProbe match=0 next=3 learner
|
||||
|
||||
simple
|
||||
r1 v5
|
||||
----
|
||||
more than voter changed without entering joint config
|
||||
|
||||
simple
|
||||
r1 r2
|
||||
----
|
||||
removed all voters
|
||||
|
||||
simple
|
||||
v3 v4
|
||||
----
|
||||
more than voter changed without entering joint config
|
||||
|
||||
simple
|
||||
l1 v5
|
||||
----
|
||||
more than voter changed without entering joint config
|
||||
|
||||
simple
|
||||
l1 l2
|
||||
----
|
||||
removed all voters
|
||||
|
||||
simple
|
||||
l2 l3 l4 l5
|
||||
----
|
||||
voters=(1) learners=(2 3 4 5)
|
||||
1: StateProbe match=0 next=2
|
||||
2: StateProbe match=0 next=3 learner
|
||||
3: StateProbe match=0 next=3 learner
|
||||
4: StateProbe match=0 next=9 learner
|
||||
5: StateProbe match=0 next=9 learner
|
||||
|
||||
simple
|
||||
r1
|
||||
----
|
||||
removed all voters
|
||||
|
||||
simple
|
||||
r2 r3 r4 r5
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=2
|
23
raft/confchange/testdata/update.txt
vendored
Normal file
23
raft/confchange/testdata/update.txt
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
# Nobody cares about ConfChangeUpdateNode, but at least use it once. It is used
|
||||
# by etcd as a convenient way to pass a blob through their conf change machinery
|
||||
# that updates information tracked outside of raft.
|
||||
|
||||
simple
|
||||
v1
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=1
|
||||
|
||||
simple
|
||||
v2 u1
|
||||
----
|
||||
voters=(1 2)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2
|
||||
|
||||
simple
|
||||
u1 u2 u3 u1 u2 u3
|
||||
----
|
||||
voters=(1 2)
|
||||
1: StateProbe match=0 next=1
|
||||
2: StateProbe match=0 next=2
|
6
raft/confchange/testdata/zero.txt
vendored
Normal file
6
raft/confchange/testdata/zero.txt
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
# NodeID zero is ignored.
|
||||
simple
|
||||
v1 r0 v0 l0
|
||||
----
|
||||
voters=(1)
|
||||
1: StateProbe match=0 next=1
|
71
raft/raft.go
71
raft/raft.go
@ -24,6 +24,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.etcd.io/etcd/raft/confchange"
|
||||
"go.etcd.io/etcd/raft/quorum"
|
||||
pb "go.etcd.io/etcd/raft/raftpb"
|
||||
"go.etcd.io/etcd/raft/tracker"
|
||||
@ -356,15 +357,11 @@ func newRaft(c *Config) *raft {
|
||||
}
|
||||
for _, p := range peers {
|
||||
// Add node to active config.
|
||||
r.prs.InitProgress(p, 0 /* match */, 1 /* next */, false /* isLearner */)
|
||||
r.applyConfChange(pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: p})
|
||||
}
|
||||
for _, p := range learners {
|
||||
// Add learner to active config.
|
||||
r.prs.InitProgress(p, 0 /* match */, 1 /* next */, true /* isLearner */)
|
||||
|
||||
if r.id == p {
|
||||
r.isLearner = true
|
||||
}
|
||||
r.applyConfChange(pb.ConfChange{Type: pb.ConfChangeAddLearnerNode, NodeID: p})
|
||||
}
|
||||
|
||||
if !isHardStateEqual(hs, emptyState) {
|
||||
@ -1401,55 +1398,15 @@ func (r *raft) promotable() bool {
|
||||
}
|
||||
|
||||
func (r *raft) applyConfChange(cc pb.ConfChange) pb.ConfState {
|
||||
addNodeOrLearnerNode := func(id uint64, isLearner bool) {
|
||||
// NB: this method is intentionally hidden from view. All mutations of
|
||||
// the conf state must call applyConfChange directly.
|
||||
pr := r.prs.Progress[id]
|
||||
if pr == nil {
|
||||
r.prs.InitProgress(id, 0, r.raftLog.lastIndex()+1, isLearner)
|
||||
} else {
|
||||
if isLearner && !pr.IsLearner {
|
||||
// Can only change Learner to Voter.
|
||||
//
|
||||
// TODO(tbg): why?
|
||||
r.logger.Infof("%x ignored addLearner: do not support changing %x from raft peer to learner.", r.id, id)
|
||||
return
|
||||
}
|
||||
|
||||
if isLearner == pr.IsLearner {
|
||||
// Ignore any redundant addNode calls (which can happen because the
|
||||
// initial bootstrapping entries are applied twice).
|
||||
return
|
||||
}
|
||||
|
||||
// Change Learner to Voter, use origin Learner progress.
|
||||
r.prs.RemoveAny(id)
|
||||
r.prs.InitProgress(id, 0 /* match */, 1 /* next */, false /* isLearner */)
|
||||
pr.IsLearner = false
|
||||
*r.prs.Progress[id] = *pr
|
||||
}
|
||||
|
||||
// When a node is first added, we should mark it as recently active.
|
||||
// Otherwise, CheckQuorum may cause us to step down if it is invoked
|
||||
// before the added node has had a chance to communicate with us.
|
||||
r.prs.Progress[id].RecentActive = true
|
||||
}
|
||||
|
||||
var removed int
|
||||
if cc.NodeID != None {
|
||||
switch cc.Type {
|
||||
case pb.ConfChangeAddNode:
|
||||
addNodeOrLearnerNode(cc.NodeID, false /* isLearner */)
|
||||
case pb.ConfChangeAddLearnerNode:
|
||||
addNodeOrLearnerNode(cc.NodeID, true /* isLearner */)
|
||||
case pb.ConfChangeRemoveNode:
|
||||
removed++
|
||||
r.prs.RemoveAny(cc.NodeID)
|
||||
case pb.ConfChangeUpdateNode:
|
||||
default:
|
||||
panic("unexpected conf type")
|
||||
}
|
||||
cfg, prs, err := confchange.Changer{
|
||||
Tracker: r.prs,
|
||||
LastIndex: r.raftLog.lastIndex(),
|
||||
}.Simple(cc)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
r.prs.Config = cfg
|
||||
r.prs.Progress = prs
|
||||
|
||||
r.logger.Infof("%x switched to configuration %s", r.id, r.prs.Config)
|
||||
// Now that the configuration is updated, handle any side effects.
|
||||
@ -1479,12 +1436,10 @@ func (r *raft) applyConfChange(cc pb.ConfChange) pb.ConfState {
|
||||
if r.state != StateLeader || len(cs.Nodes) == 0 {
|
||||
return cs
|
||||
}
|
||||
if removed > 0 {
|
||||
if r.maybeCommit() {
|
||||
// The quorum size may have been reduced (but not to zero), so see if
|
||||
// any pending entries can be committed.
|
||||
if r.maybeCommit() {
|
||||
r.bcastAppend()
|
||||
}
|
||||
r.bcastAppend()
|
||||
}
|
||||
// If the the leadTransferee was removed, abort the leadership transfer.
|
||||
if _, tOK := r.prs.Progress[r.leadTransferee]; !tOK && r.leadTransferee != 0 {
|
||||
|
@ -1140,9 +1140,13 @@ func TestCommit(t *testing.T) {
|
||||
storage.hardState = pb.HardState{Term: tt.smTerm}
|
||||
|
||||
sm := newTestRaft(1, []uint64{1}, 10, 2, storage)
|
||||
sm.prs.RemoveAny(1)
|
||||
for j := 0; j < len(tt.matches); j++ {
|
||||
sm.prs.InitProgress(uint64(j)+1, tt.matches[j], tt.matches[j]+1, false)
|
||||
id := uint64(j) + 1
|
||||
if id > 1 {
|
||||
sm.applyConfChange(pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: id})
|
||||
}
|
||||
pr := sm.prs.Progress[id]
|
||||
pr.Match, pr.Next = tt.matches[j], tt.matches[j]+1
|
||||
}
|
||||
sm.maybeCommit()
|
||||
if g := sm.raftLog.committed; g != tt.w {
|
||||
@ -1927,7 +1931,7 @@ func TestNonPromotableVoterWithCheckQuorum(t *testing.T) {
|
||||
nt := newNetwork(a, b)
|
||||
setRandomizedElectionTimeout(b, b.electionTimeout+1)
|
||||
// Need to remove 2 again to make it a non-promotable node since newNetwork overwritten some internal states
|
||||
b.prs.RemoveAny(2)
|
||||
b.applyConfChange(pb.ConfChange{Type: pb.ConfChangeRemoveNode, NodeID: 2})
|
||||
|
||||
if b.promotable() {
|
||||
t.Fatalf("promotable = %v, want false", b.promotable())
|
||||
@ -3093,14 +3097,42 @@ func TestAddNode(t *testing.T) {
|
||||
// TestAddLearner tests that addLearner could update nodes correctly.
|
||||
func TestAddLearner(t *testing.T) {
|
||||
r := newTestRaft(1, []uint64{1}, 10, 1, NewMemoryStorage())
|
||||
// Add new learner peer.
|
||||
r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddLearnerNode})
|
||||
if r.isLearner {
|
||||
t.Fatal("expected 1 to be voter")
|
||||
}
|
||||
nodes := r.prs.LearnerNodes()
|
||||
wnodes := []uint64{2}
|
||||
if !reflect.DeepEqual(nodes, wnodes) {
|
||||
t.Errorf("nodes = %v, want %v", nodes, wnodes)
|
||||
}
|
||||
if !r.prs.Progress[2].IsLearner {
|
||||
t.Errorf("node 2 is learner %t, want %t", r.prs.Progress[2].IsLearner, true)
|
||||
t.Fatal("expected 2 to be learner")
|
||||
}
|
||||
|
||||
// Promote peer to voter.
|
||||
r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddNode})
|
||||
if r.prs.Progress[2].IsLearner {
|
||||
t.Fatal("expected 2 to be voter")
|
||||
}
|
||||
|
||||
// Demote r.
|
||||
r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeAddLearnerNode})
|
||||
if !r.prs.Progress[1].IsLearner {
|
||||
t.Fatal("expected 1 to be learner")
|
||||
}
|
||||
if !r.isLearner {
|
||||
t.Fatal("expected 1 to be learner")
|
||||
}
|
||||
|
||||
// Promote r again.
|
||||
r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeAddNode})
|
||||
if r.prs.Progress[1].IsLearner {
|
||||
t.Fatal("expected 1 to be voter")
|
||||
}
|
||||
if r.isLearner {
|
||||
t.Fatal("expected 1 to be voter")
|
||||
}
|
||||
}
|
||||
|
||||
@ -3148,12 +3180,13 @@ func TestRemoveNode(t *testing.T) {
|
||||
t.Errorf("nodes = %v, want %v", g, w)
|
||||
}
|
||||
|
||||
// remove all nodes from cluster
|
||||
// Removing the remaining voter will panic.
|
||||
defer func() {
|
||||
if r := recover(); r == nil {
|
||||
t.Error("did not panic")
|
||||
}
|
||||
}()
|
||||
r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeRemoveNode})
|
||||
w = []uint64{}
|
||||
if g := r.prs.VoterNodes(); !reflect.DeepEqual(g, w) {
|
||||
t.Errorf("nodes = %v, want %v", g, w)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRemoveLearner tests that removeNode could update nodes and
|
||||
@ -3171,12 +3204,15 @@ func TestRemoveLearner(t *testing.T) {
|
||||
t.Errorf("nodes = %v, want %v", g, w)
|
||||
}
|
||||
|
||||
// remove all nodes from cluster
|
||||
// Removing the remaining voter will panic.
|
||||
defer func() {
|
||||
if r := recover(); r == nil {
|
||||
t.Error("did not panic")
|
||||
}
|
||||
}()
|
||||
r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeRemoveNode})
|
||||
if g := r.prs.VoterNodes(); !reflect.DeepEqual(g, w) {
|
||||
t.Errorf("nodes = %v, want %v", g, w)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromotable(t *testing.T) {
|
||||
id := uint64(1)
|
||||
tests := []struct {
|
||||
@ -4124,12 +4160,16 @@ func newNetworkWithConfig(configFunc func(*Config), peers ...stateMachine) *netw
|
||||
sm := newRaft(cfg)
|
||||
npeers[id] = sm
|
||||
case *raft:
|
||||
// TODO(tbg): this is all pretty confused. Clean this up.
|
||||
learners := make(map[uint64]bool, len(v.prs.Learners))
|
||||
for i := range v.prs.Learners {
|
||||
learners[i] = true
|
||||
}
|
||||
v.id = id
|
||||
v.prs = tracker.MakeProgressTracker(v.prs.MaxInflight)
|
||||
if len(learners) > 0 {
|
||||
v.prs.Learners = map[uint64]struct{}{}
|
||||
}
|
||||
for i := 0; i < size; i++ {
|
||||
pr := &tracker.Progress{}
|
||||
if _, ok := learners[peerAddrs[i]]; ok {
|
||||
|
@ -16,6 +16,7 @@ package tracker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@ -235,3 +236,22 @@ func (pr *Progress) String() string {
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// ProgressMap is a map of *Progress.
|
||||
type ProgressMap map[uint64]*Progress
|
||||
|
||||
// String prints the ProgressMap in sorted key order, one Progress per line.
|
||||
func (m ProgressMap) String() string {
|
||||
ids := make([]uint64, 0, len(m))
|
||||
for k := range m {
|
||||
ids = append(ids, k)
|
||||
}
|
||||
sort.Slice(ids, func(i, j int) bool {
|
||||
return ids[i] < ids[j]
|
||||
})
|
||||
var buf strings.Builder
|
||||
for _, id := range ids {
|
||||
fmt.Fprintf(&buf, "%d: %s\n", id, m[id])
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ package tracker
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"go.etcd.io/etcd/raft/quorum"
|
||||
)
|
||||
@ -33,12 +34,11 @@ type Config struct {
|
||||
// simplifies the implementation since it allows peers to have clarity about
|
||||
// its current role without taking into account joint consensus.
|
||||
Learners map[uint64]struct{}
|
||||
// TODO(tbg): when we actually carry out joint consensus changes and turn a
|
||||
// voter into a learner, we cannot add the learner when entering the joint
|
||||
// state. This is because this would violate the invariant that the inter-
|
||||
// section of voters and learners is empty. For example, assume a Voter is
|
||||
// removed and immediately re-added as a learner (or in other words, it is
|
||||
// demoted).
|
||||
// When we turn a voter into a learner during a joint consensus transition,
|
||||
// we cannot add the learner directly when entering the joint state. This is
|
||||
// because this would violate the invariant that the intersection of
|
||||
// voters and learners is empty. For example, assume a Voter is removed and
|
||||
// immediately re-added as a learner (or in other words, it is demoted):
|
||||
//
|
||||
// Initially, the configuration will be
|
||||
//
|
||||
@ -51,7 +51,7 @@ type Config struct {
|
||||
// learners: {3}
|
||||
//
|
||||
// but this violates the invariant (3 is both voter and learner). Instead,
|
||||
// we have
|
||||
// we get
|
||||
//
|
||||
// voters: {1 2} & {1 2 3}
|
||||
// learners: {}
|
||||
@ -66,20 +66,40 @@ type Config struct {
|
||||
//
|
||||
// Note that next_learners is not used while adding a learner that is not
|
||||
// also a voter in the joint config. In this case, the learner is added
|
||||
// to Learners right away when entering the joint configuration, so that it
|
||||
// is caught up as soon as possible.
|
||||
//
|
||||
// NextLearners map[uint64]struct{}
|
||||
// right away when entering the joint configuration, so that it is caught up
|
||||
// as soon as possible.
|
||||
LearnersNext map[uint64]struct{}
|
||||
}
|
||||
|
||||
func (c *Config) String() string {
|
||||
if len(c.Learners) == 0 {
|
||||
return fmt.Sprintf("voters=%s", c.Voters)
|
||||
func (c Config) String() string {
|
||||
var buf strings.Builder
|
||||
fmt.Fprintf(&buf, "voters=%s", c.Voters)
|
||||
if c.Learners != nil {
|
||||
fmt.Fprintf(&buf, " learners=%s", quorum.MajorityConfig(c.Learners).String())
|
||||
}
|
||||
if c.LearnersNext != nil {
|
||||
fmt.Fprintf(&buf, " learners_next=%s", quorum.MajorityConfig(c.LearnersNext).String())
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// Clone returns a copy of the Config that shares no memory with the original.
|
||||
func (c *Config) Clone() Config {
|
||||
clone := func(m map[uint64]struct{}) map[uint64]struct{} {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
mm := make(map[uint64]struct{}, len(m))
|
||||
for k := range m {
|
||||
mm[k] = struct{}{}
|
||||
}
|
||||
return mm
|
||||
}
|
||||
return Config{
|
||||
Voters: quorum.JointConfig{clone(c.Voters[0]), clone(c.Voters[1])},
|
||||
Learners: clone(c.Learners),
|
||||
LearnersNext: clone(c.LearnersNext),
|
||||
}
|
||||
return fmt.Sprintf(
|
||||
"voters=%s learners=%s",
|
||||
c.Voters, quorum.MajorityConfig(c.Learners).String(),
|
||||
)
|
||||
}
|
||||
|
||||
// ProgressTracker tracks the currently active configuration and the information
|
||||
@ -88,7 +108,7 @@ func (c *Config) String() string {
|
||||
type ProgressTracker struct {
|
||||
Config
|
||||
|
||||
Progress map[uint64]*Progress
|
||||
Progress ProgressMap
|
||||
|
||||
Votes map[uint64]bool
|
||||
|
||||
@ -102,11 +122,10 @@ func MakeProgressTracker(maxInflight int) ProgressTracker {
|
||||
Config: Config{
|
||||
Voters: quorum.JointConfig{
|
||||
quorum.MajorityConfig{},
|
||||
// TODO(tbg): this will be mostly empty, so make it a nil pointer
|
||||
// in the common case.
|
||||
quorum.MajorityConfig{},
|
||||
nil, // only populated when used
|
||||
},
|
||||
Learners: map[uint64]struct{}{},
|
||||
Learners: nil, // only populated when used
|
||||
LearnersNext: nil, // only populated when used
|
||||
},
|
||||
Votes: map[uint64]bool{},
|
||||
Progress: map[uint64]*Progress{},
|
||||
@ -139,44 +158,6 @@ func (p *ProgressTracker) Committed() uint64 {
|
||||
return uint64(p.Voters.CommittedIndex(matchAckIndexer(p.Progress)))
|
||||
}
|
||||
|
||||
// RemoveAny removes this peer, which *must* be tracked as a voter or learner,
|
||||
// from the tracker.
|
||||
func (p *ProgressTracker) RemoveAny(id uint64) {
|
||||
_, okPR := p.Progress[id]
|
||||
_, okV1 := p.Voters[0][id]
|
||||
_, okV2 := p.Voters[1][id]
|
||||
_, okL := p.Learners[id]
|
||||
|
||||
okV := okV1 || okV2
|
||||
|
||||
if !okPR {
|
||||
panic("attempting to remove unknown peer %x")
|
||||
} else if !okV && !okL {
|
||||
panic("attempting to remove unknown peer %x")
|
||||
} else if okV && okL {
|
||||
panic(fmt.Sprintf("peer %x is both voter and learner", id))
|
||||
}
|
||||
|
||||
delete(p.Voters[0], id)
|
||||
delete(p.Voters[1], id)
|
||||
delete(p.Learners, id)
|
||||
delete(p.Progress, id)
|
||||
}
|
||||
|
||||
// InitProgress initializes a new progress for the given node or learner. The
|
||||
// node may not exist yet in either form or a panic will ensue.
|
||||
func (p *ProgressTracker) InitProgress(id, match, next uint64, isLearner bool) {
|
||||
if pr := p.Progress[id]; pr != nil {
|
||||
panic(fmt.Sprintf("peer %x already tracked as node %v", id, pr))
|
||||
}
|
||||
if !isLearner {
|
||||
p.Voters[0][id] = struct{}{}
|
||||
} else {
|
||||
p.Learners[id] = struct{}{}
|
||||
}
|
||||
p.Progress[id] = &Progress{Next: next, Match: match, Inflights: NewInflights(p.MaxInflight), IsLearner: isLearner}
|
||||
}
|
||||
|
||||
// Visit invokes the supplied closure for all tracked progresses.
|
||||
func (p *ProgressTracker) Visit(f func(id uint64, pr *Progress)) {
|
||||
for id, pr := range p.Progress {
|
||||
|
Loading…
x
Reference in New Issue
Block a user