mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

Current membership changing functionality of etcd seems to have a problem which can cause deadlock. How to produce: 1. construct N node cluster 2. add N new nodes with etcdctl member add, without starting the new members What happens: After finishing add N nodes, a total number of the cluster becomes 2 * N and a quorum number of the cluster becomes N + 1. It means membership change requires at least N + 1 nodes because Raft treats membership information in its log like other ordinal log append requests. Assume the peer URLs of the added nodes are wrong because of miss operation or bugs in wrapping program which launch etcd. In such a case, both of adding and removing members are impossible because the quorum isn't preserved. Of course ordinal requests cannot be served. The cluster would seem to be deadlock. Of course, the best practice of adding new nodes is adding one node and let the node start one by one. However, the effect of this problem is so serious. I think preventing the problem forcibly would be valuable. Solution: This patch lets etcd forbid adding a new node if the operation changes quorum and the number of changed quorum is larger than a number of running nodes. If etcd is launched with a newly added option -strict-reconfig-check, the checking logic is activated. If the option isn't passed, default behavior of reconfig is kept. Fixes https://github.com/coreos/etcd/issues/3477
330 lines
12 KiB
Go
330 lines
12 KiB
Go
// Copyright 2015 CoreOS, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package etcdmain
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"net/url"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
|
|
"github.com/coreos/etcd/etcdserver"
|
|
"github.com/coreos/etcd/pkg/cors"
|
|
"github.com/coreos/etcd/pkg/flags"
|
|
"github.com/coreos/etcd/pkg/transport"
|
|
"github.com/coreos/etcd/version"
|
|
)
|
|
|
|
const (
|
|
proxyFlagOff = "off"
|
|
proxyFlagReadonly = "readonly"
|
|
proxyFlagOn = "on"
|
|
|
|
fallbackFlagExit = "exit"
|
|
fallbackFlagProxy = "proxy"
|
|
|
|
clusterStateFlagNew = "new"
|
|
clusterStateFlagExisting = "existing"
|
|
|
|
defaultName = "default"
|
|
|
|
// maxElectionMs specifies the maximum value of election timeout.
|
|
// More details are listed in ../Documentation/tuning.md#time-parameters.
|
|
maxElectionMs = 50000
|
|
)
|
|
|
|
var (
|
|
ignored = []string{
|
|
"cluster-active-size",
|
|
"cluster-remove-delay",
|
|
"cluster-sync-interval",
|
|
"config",
|
|
"force",
|
|
"max-result-buffer",
|
|
"max-retry-attempts",
|
|
"peer-heartbeat-interval",
|
|
"peer-election-timeout",
|
|
"retry-interval",
|
|
"snapshot",
|
|
"v",
|
|
"vv",
|
|
}
|
|
|
|
ErrConflictBootstrapFlags = fmt.Errorf("multiple discovery or bootstrap flags are set. " +
|
|
"Choose one of \"initial-cluster\", \"discovery\" or \"discovery-srv\"")
|
|
errUnsetAdvertiseClientURLsFlag = fmt.Errorf("-advertise-client-urls is required when -listen-client-urls is set explicitly")
|
|
)
|
|
|
|
type config struct {
|
|
*flag.FlagSet
|
|
|
|
// member
|
|
corsInfo *cors.CORSInfo
|
|
dir string
|
|
walDir string
|
|
lpurls, lcurls []url.URL
|
|
maxSnapFiles uint
|
|
maxWalFiles uint
|
|
name string
|
|
snapCount uint64
|
|
// TODO: decouple tickMs and heartbeat tick (current heartbeat tick = 1).
|
|
// make ticks a cluster wide configuration.
|
|
TickMs uint
|
|
ElectionMs uint
|
|
|
|
// clustering
|
|
apurls, acurls []url.URL
|
|
clusterState *flags.StringsFlag
|
|
dnsCluster string
|
|
dproxy string
|
|
durl string
|
|
fallback *flags.StringsFlag
|
|
initialCluster string
|
|
initialClusterToken string
|
|
strictReconfigCheck bool
|
|
|
|
// proxy
|
|
proxy *flags.StringsFlag
|
|
proxyFailureWaitMs uint
|
|
proxyRefreshIntervalMs uint
|
|
proxyDialTimeoutMs uint
|
|
proxyWriteTimeoutMs uint
|
|
proxyReadTimeoutMs uint
|
|
|
|
// security
|
|
clientTLSInfo, peerTLSInfo transport.TLSInfo
|
|
|
|
// logging
|
|
debug bool
|
|
logPkgLevels string
|
|
|
|
// unsafe
|
|
forceNewCluster bool
|
|
|
|
printVersion bool
|
|
|
|
v3demo bool
|
|
|
|
ignored []string
|
|
}
|
|
|
|
func NewConfig() *config {
|
|
cfg := &config{
|
|
corsInfo: &cors.CORSInfo{},
|
|
clusterState: flags.NewStringsFlag(
|
|
clusterStateFlagNew,
|
|
clusterStateFlagExisting,
|
|
),
|
|
fallback: flags.NewStringsFlag(
|
|
fallbackFlagExit,
|
|
fallbackFlagProxy,
|
|
),
|
|
ignored: ignored,
|
|
proxy: flags.NewStringsFlag(
|
|
proxyFlagOff,
|
|
proxyFlagReadonly,
|
|
proxyFlagOn,
|
|
),
|
|
}
|
|
|
|
cfg.FlagSet = flag.NewFlagSet("etcd", flag.ContinueOnError)
|
|
fs := cfg.FlagSet
|
|
fs.Usage = func() {
|
|
fmt.Println(usageline)
|
|
}
|
|
|
|
// member
|
|
fs.Var(cfg.corsInfo, "cors", "Comma-separated white list of origins for CORS (cross-origin resource sharing).")
|
|
fs.StringVar(&cfg.dir, "data-dir", "", "Path to the data directory")
|
|
fs.StringVar(&cfg.walDir, "wal-dir", "", "Path to the dedicated wal directory")
|
|
fs.Var(flags.NewURLsValue("http://localhost:2380,http://localhost:7001"), "listen-peer-urls", "List of URLs to listen on for peer traffic")
|
|
fs.Var(flags.NewURLsValue("http://localhost:2379,http://localhost:4001"), "listen-client-urls", "List of URLs to listen on for client traffic")
|
|
fs.UintVar(&cfg.maxSnapFiles, "max-snapshots", defaultMaxSnapshots, "Maximum number of snapshot files to retain (0 is unlimited)")
|
|
fs.UintVar(&cfg.maxWalFiles, "max-wals", defaultMaxWALs, "Maximum number of wal files to retain (0 is unlimited)")
|
|
fs.StringVar(&cfg.name, "name", defaultName, "Unique human-readable name for this node")
|
|
fs.Uint64Var(&cfg.snapCount, "snapshot-count", etcdserver.DefaultSnapCount, "Number of committed transactions to trigger a snapshot")
|
|
fs.UintVar(&cfg.TickMs, "heartbeat-interval", 100, "Time (in milliseconds) of a heartbeat interval.")
|
|
fs.UintVar(&cfg.ElectionMs, "election-timeout", 1000, "Time (in milliseconds) for an election to timeout.")
|
|
|
|
// clustering
|
|
fs.Var(flags.NewURLsValue("http://localhost:2380,http://localhost:7001"), "initial-advertise-peer-urls", "List of this member's peer URLs to advertise to the rest of the cluster")
|
|
fs.Var(flags.NewURLsValue("http://localhost:2379,http://localhost:4001"), "advertise-client-urls", "List of this member's client URLs to advertise to the rest of the cluster")
|
|
fs.StringVar(&cfg.durl, "discovery", "", "Discovery service used to bootstrap the initial cluster")
|
|
fs.Var(cfg.fallback, "discovery-fallback", fmt.Sprintf("Valid values include %s", strings.Join(cfg.fallback.Values, ", ")))
|
|
if err := cfg.fallback.Set(fallbackFlagProxy); err != nil {
|
|
// Should never happen.
|
|
plog.Panicf("unexpected error setting up discovery-fallback flag: %v", err)
|
|
}
|
|
fs.StringVar(&cfg.dproxy, "discovery-proxy", "", "HTTP proxy to use for traffic to discovery service")
|
|
fs.StringVar(&cfg.dnsCluster, "discovery-srv", "", "DNS domain used to bootstrap initial cluster")
|
|
fs.StringVar(&cfg.initialCluster, "initial-cluster", initialClusterFromName(defaultName), "Initial cluster configuration for bootstrapping")
|
|
fs.StringVar(&cfg.initialClusterToken, "initial-cluster-token", "etcd-cluster", "Initial cluster token for the etcd cluster during bootstrap")
|
|
fs.Var(cfg.clusterState, "initial-cluster-state", "Initial cluster configuration for bootstrapping")
|
|
if err := cfg.clusterState.Set(clusterStateFlagNew); err != nil {
|
|
// Should never happen.
|
|
plog.Panicf("unexpected error setting up clusterStateFlag: %v", err)
|
|
}
|
|
fs.BoolVar(&cfg.strictReconfigCheck, "strict-reconfig-check", false, "Reject reconfiguration that might cause quorum loss")
|
|
|
|
// proxy
|
|
fs.Var(cfg.proxy, "proxy", fmt.Sprintf("Valid values include %s", strings.Join(cfg.proxy.Values, ", ")))
|
|
if err := cfg.proxy.Set(proxyFlagOff); err != nil {
|
|
// Should never happen.
|
|
plog.Panicf("unexpected error setting up proxyFlag: %v", err)
|
|
}
|
|
fs.UintVar(&cfg.proxyFailureWaitMs, "proxy-failure-wait", 5000, "Time (in milliseconds) an endpoint will be held in a failed state.")
|
|
fs.UintVar(&cfg.proxyRefreshIntervalMs, "proxy-refresh-interval", 30000, "Time (in milliseconds) of the endpoints refresh interval.")
|
|
fs.UintVar(&cfg.proxyDialTimeoutMs, "proxy-dial-timeout", 1000, "Time (in milliseconds) for a dial to timeout.")
|
|
fs.UintVar(&cfg.proxyWriteTimeoutMs, "proxy-write-timeout", 5000, "Time (in milliseconds) for a write to timeout.")
|
|
fs.UintVar(&cfg.proxyReadTimeoutMs, "proxy-read-timeout", 0, "Time (in milliseconds) for a read to timeout.")
|
|
|
|
// security
|
|
fs.StringVar(&cfg.clientTLSInfo.CAFile, "ca-file", "", "DEPRECATED: Path to the client server TLS CA file.")
|
|
fs.StringVar(&cfg.clientTLSInfo.CertFile, "cert-file", "", "Path to the client server TLS cert file.")
|
|
fs.StringVar(&cfg.clientTLSInfo.KeyFile, "key-file", "", "Path to the client server TLS key file.")
|
|
fs.BoolVar(&cfg.clientTLSInfo.ClientCertAuth, "client-cert-auth", false, "Enable client cert authentication.")
|
|
fs.StringVar(&cfg.clientTLSInfo.TrustedCAFile, "trusted-ca-file", "", "Path to the client server TLS trusted CA key file.")
|
|
fs.StringVar(&cfg.peerTLSInfo.CAFile, "peer-ca-file", "", "DEPRECATED: Path to the peer server TLS CA file.")
|
|
fs.StringVar(&cfg.peerTLSInfo.CertFile, "peer-cert-file", "", "Path to the peer server TLS cert file.")
|
|
fs.StringVar(&cfg.peerTLSInfo.KeyFile, "peer-key-file", "", "Path to the peer server TLS key file.")
|
|
fs.BoolVar(&cfg.peerTLSInfo.ClientCertAuth, "peer-client-cert-auth", false, "Enable peer client cert authentication.")
|
|
fs.StringVar(&cfg.peerTLSInfo.TrustedCAFile, "peer-trusted-ca-file", "", "Path to the peer server TLS trusted CA file.")
|
|
|
|
// logging
|
|
fs.BoolVar(&cfg.debug, "debug", false, "Enable debug output to the logs.")
|
|
fs.StringVar(&cfg.logPkgLevels, "log-package-levels", "", "Specify a particular log level for each etcd package.")
|
|
|
|
// unsafe
|
|
fs.BoolVar(&cfg.forceNewCluster, "force-new-cluster", false, "Force to create a new one member cluster")
|
|
|
|
// version
|
|
fs.BoolVar(&cfg.printVersion, "version", false, "Print the version and exit")
|
|
|
|
// demo flag
|
|
fs.BoolVar(&cfg.v3demo, "experimental-v3demo", false, "Enable experimental v3 demo API")
|
|
|
|
// backwards-compatibility with v0.4.6
|
|
fs.Var(&flags.IPAddressPort{}, "addr", "DEPRECATED: Use -advertise-client-urls instead.")
|
|
fs.Var(&flags.IPAddressPort{}, "bind-addr", "DEPRECATED: Use -listen-client-urls instead.")
|
|
fs.Var(&flags.IPAddressPort{}, "peer-addr", "DEPRECATED: Use -initial-advertise-peer-urls instead.")
|
|
fs.Var(&flags.IPAddressPort{}, "peer-bind-addr", "DEPRECATED: Use -listen-peer-urls instead.")
|
|
fs.Var(&flags.DeprecatedFlag{Name: "peers"}, "peers", "DEPRECATED: Use -initial-cluster instead")
|
|
fs.Var(&flags.DeprecatedFlag{Name: "peers-file"}, "peers-file", "DEPRECATED: Use -initial-cluster instead")
|
|
|
|
// ignored
|
|
for _, f := range cfg.ignored {
|
|
fs.Var(&flags.IgnoredFlag{Name: f}, f, "")
|
|
}
|
|
return cfg
|
|
}
|
|
|
|
func (cfg *config) Parse(arguments []string) error {
|
|
perr := cfg.FlagSet.Parse(arguments)
|
|
switch perr {
|
|
case nil:
|
|
case flag.ErrHelp:
|
|
fmt.Println(flagsline)
|
|
os.Exit(0)
|
|
default:
|
|
os.Exit(2)
|
|
}
|
|
if len(cfg.FlagSet.Args()) != 0 {
|
|
return fmt.Errorf("'%s' is not a valid flag", cfg.FlagSet.Arg(0))
|
|
}
|
|
|
|
if cfg.printVersion {
|
|
fmt.Printf("etcd Version: %s\n", version.Version)
|
|
fmt.Printf("Git SHA: %s\n", version.GitSHA)
|
|
fmt.Printf("Go Version: %s\n", runtime.Version())
|
|
fmt.Printf("Go OS/Arch: %s/%s\n", runtime.GOOS, runtime.GOARCH)
|
|
os.Exit(0)
|
|
}
|
|
|
|
err := flags.SetFlagsFromEnv(cfg.FlagSet)
|
|
if err != nil {
|
|
plog.Fatalf("%v", err)
|
|
}
|
|
|
|
set := make(map[string]bool)
|
|
cfg.FlagSet.Visit(func(f *flag.Flag) {
|
|
set[f.Name] = true
|
|
})
|
|
nSet := 0
|
|
for _, v := range []bool{set["discovery"], set["initial-cluster"], set["discovery-srv"]} {
|
|
if v {
|
|
nSet += 1
|
|
}
|
|
}
|
|
if nSet > 1 {
|
|
return ErrConflictBootstrapFlags
|
|
}
|
|
|
|
flags.SetBindAddrFromAddr(cfg.FlagSet, "peer-bind-addr", "peer-addr")
|
|
flags.SetBindAddrFromAddr(cfg.FlagSet, "bind-addr", "addr")
|
|
|
|
cfg.lpurls, err = flags.URLsFromFlags(cfg.FlagSet, "listen-peer-urls", "peer-bind-addr", cfg.peerTLSInfo)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cfg.apurls, err = flags.URLsFromFlags(cfg.FlagSet, "initial-advertise-peer-urls", "peer-addr", cfg.peerTLSInfo)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cfg.lcurls, err = flags.URLsFromFlags(cfg.FlagSet, "listen-client-urls", "bind-addr", cfg.clientTLSInfo)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cfg.acurls, err = flags.URLsFromFlags(cfg.FlagSet, "advertise-client-urls", "addr", cfg.clientTLSInfo)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// when etcd runs in member mode user needs to set -advertise-client-urls if -listen-client-urls is set.
|
|
// TODO(yichengq): check this for joining through discovery service case
|
|
mayFallbackToProxy := flags.IsSet(cfg.FlagSet, "discovery") && cfg.fallback.String() == fallbackFlagProxy
|
|
mayBeProxy := cfg.proxy.String() != proxyFlagOff || mayFallbackToProxy
|
|
if !mayBeProxy {
|
|
if flags.IsSet(cfg.FlagSet, "listen-client-urls") && !flags.IsSet(cfg.FlagSet, "advertise-client-urls") {
|
|
return errUnsetAdvertiseClientURLsFlag
|
|
}
|
|
}
|
|
|
|
if 5*cfg.TickMs > cfg.ElectionMs {
|
|
return fmt.Errorf("-election-timeout[%vms] should be at least as 5 times as -heartbeat-interval[%vms]", cfg.ElectionMs, cfg.TickMs)
|
|
}
|
|
if cfg.ElectionMs > maxElectionMs {
|
|
return fmt.Errorf("-election-timeout[%vms] is too long, and should be set less than %vms", cfg.ElectionMs, maxElectionMs)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func initialClusterFromName(name string) string {
|
|
n := name
|
|
if name == "" {
|
|
n = defaultName
|
|
}
|
|
return fmt.Sprintf("%s=http://localhost:2380,%s=http://localhost:7001", n, n)
|
|
}
|
|
|
|
func (cfg config) isNewCluster() bool { return cfg.clusterState.String() == clusterStateFlagNew }
|
|
func (cfg config) isProxy() bool { return cfg.proxy.String() != proxyFlagOff }
|
|
func (cfg config) isReadonlyProxy() bool { return cfg.proxy.String() == proxyFlagReadonly }
|
|
func (cfg config) shouldFallbackToProxy() bool { return cfg.fallback.String() == fallbackFlagProxy }
|
|
|
|
func (cfg config) electionTicks() int { return int(cfg.ElectionMs / cfg.TickMs) }
|