From ec214030d07d8b510dacdd03517331583ee33b89 Mon Sep 17 00:00:00 2001 From: Yicheng Qin Date: Thu, 23 Jul 2015 14:48:11 -0700 Subject: [PATCH] etcdmain: reject unreasonably high values of -election-timeout This helps users to detect setting problem early. --- Documentation/configuration.md | 2 +- Documentation/tuning.md | 2 ++ etcdmain/config.go | 7 +++++++ etcdmain/help.go | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/configuration.md b/Documentation/configuration.md index ada1bbcbe..d785079eb 100644 --- a/Documentation/configuration.md +++ b/Documentation/configuration.md @@ -28,7 +28,7 @@ To start etcd automatically using custom settings at startup in Linux, using a [ + default: "100" ##### -election-timeout -+ Time (in milliseconds) for an election to timeout. ++ Time (in milliseconds) for an election to timeout. See [Documentation/tuning.md](tuning.md#time-parameters) for details. + default: "1000" ##### -listen-peer-urls diff --git a/Documentation/tuning.md b/Documentation/tuning.md index 014d3a70f..4326eb3c0 100644 --- a/Documentation/tuning.md +++ b/Documentation/tuning.md @@ -25,6 +25,8 @@ The election timeout should be set based on the heartbeat interval and your netw Election timeouts should be at least 10 times your ping time so it can account for variance in your network. For example, if the ping time between your nodes is 10ms then you should have at least a 100ms election timeout. +The upper limit of election timeout is 50000ms, which should only be used when deploying global etcd cluster. First, 5s is the upper limit of average global round-trip time. A reasonable round-trip time for the continental united states is 130ms, and the time between US and japan is around 350-400ms. Because package gets delayed a lot, and network situation may be terrible, 5s is a safe value for it. Then, because election timeout should be an order of magnitude bigger than broadcast time, 50s becomes its maximum. + You should also set your election timeout to at least 5 to 10 times your heartbeat interval to account for variance in leader replication. For a heartbeat interval of 50ms you should set your election timeout to at least 250ms - 500ms. diff --git a/etcdmain/config.go b/etcdmain/config.go index aacc45033..aa3e331e8 100644 --- a/etcdmain/config.go +++ b/etcdmain/config.go @@ -41,6 +41,10 @@ const ( clusterStateFlagExisting = "existing" defaultName = "default" + + // maxElectionMs specifies the maximum value of election timeout. + // More details are listed in ../Documentation/tuning.md#time-parameters. + maxElectionMs = 50000 ) var ( @@ -293,6 +297,9 @@ func (cfg *config) Parse(arguments []string) error { if 5*cfg.TickMs > cfg.ElectionMs { return fmt.Errorf("-election-timeout[%vms] should be at least as 5 times as -heartbeat-interval[%vms]", cfg.ElectionMs, cfg.TickMs) } + if cfg.ElectionMs > maxElectionMs { + return fmt.Errorf("-election-timeout[%vms] is too long, and should be set less than %vms", cfg.ElectionMs, maxElectionMs) + } return nil } diff --git a/etcdmain/help.go b/etcdmain/help.go index b8a3095d8..404160509 100644 --- a/etcdmain/help.go +++ b/etcdmain/help.go @@ -36,7 +36,7 @@ member flags: --heartbeat-interval '100' time (in milliseconds) of a heartbeat interval. --election-timeout '1000' - time (in milliseconds) for an election to timeout. + time (in milliseconds) for an election to timeout. See tuning documentation for details. --listen-peer-urls 'http://localhost:2380,http://localhost:7001' list of URLs to listen on for peer traffic. --listen-client-urls 'http://localhost:2379,http://localhost:4001'