mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

The root reason of flakes, was that server was considered as ready to early. In particular: ``` ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:44.474+0100","caller":"rafthttp/stream.go:274","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream Message","local-member-id":"ed5f620d34a8e61b","remote-peer-id":"ca50e9357181d758"} ../../bin/etcd-2456648: {"level":"warn","ts":"2021-01-11T09:56:49.040+0100","caller":"etcdserver/server.go:1942","msg":"failed to publish local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","publish-timeout":"7s","error":"etcdserver: request timed out, possibly due to connection lost"} ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:49.049+0100","caller":"etcdserver/server.go:1921","msg":"published local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","cluster-id":"34f27e83b3bc2ff","publish-timeout":"7s"} ``` was taking 5s. If this was happening concurrently with etcdctl, the etcdctl could timeout. The fix, requires servers to report 'ready to serve client requests' to consider them up. Fixed also some whitelisted 'goroutines'.
146 lines
3.5 KiB
Go
146 lines
3.5 KiB
Go
// Copyright 2017 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package e2e
|
|
|
|
import (
|
|
"fmt"
|
|
"net/url"
|
|
"os"
|
|
|
|
"go.etcd.io/etcd/pkg/v3/expect"
|
|
"go.etcd.io/etcd/pkg/v3/fileutil"
|
|
)
|
|
|
|
var (
|
|
etcdServerReadyLines = []string{"enabled capabilities for version", "published", "ready to serve client requests"}
|
|
binPath string
|
|
ctlBinPath string
|
|
)
|
|
|
|
// etcdProcess is a process that serves etcd requests.
|
|
type etcdProcess interface {
|
|
EndpointsV2() []string
|
|
EndpointsV3() []string
|
|
EndpointsMetrics() []string
|
|
|
|
Start() error
|
|
Restart() error
|
|
Stop() error
|
|
Close() error
|
|
WithStopSignal(sig os.Signal) os.Signal
|
|
Config() *etcdServerProcessConfig
|
|
}
|
|
|
|
type etcdServerProcess struct {
|
|
cfg *etcdServerProcessConfig
|
|
proc *expect.ExpectProcess
|
|
donec chan struct{} // closed when Interact() terminates
|
|
}
|
|
|
|
type etcdServerProcessConfig struct {
|
|
execPath string
|
|
args []string
|
|
tlsArgs []string
|
|
|
|
dataDirPath string
|
|
keepDataDir bool
|
|
|
|
name string
|
|
|
|
purl url.URL
|
|
|
|
acurl string
|
|
murl string
|
|
|
|
initialToken string
|
|
initialCluster string
|
|
}
|
|
|
|
func newEtcdServerProcess(cfg *etcdServerProcessConfig) (*etcdServerProcess, error) {
|
|
if !fileutil.Exist(cfg.execPath) {
|
|
return nil, fmt.Errorf("could not find etcd binary: %s", cfg.execPath)
|
|
}
|
|
if !cfg.keepDataDir {
|
|
if err := os.RemoveAll(cfg.dataDirPath); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return &etcdServerProcess{cfg: cfg, donec: make(chan struct{})}, nil
|
|
}
|
|
|
|
func (ep *etcdServerProcess) EndpointsV2() []string { return []string{ep.cfg.acurl} }
|
|
func (ep *etcdServerProcess) EndpointsV3() []string { return ep.EndpointsV2() }
|
|
func (ep *etcdServerProcess) EndpointsMetrics() []string { return []string{ep.cfg.murl} }
|
|
|
|
func (ep *etcdServerProcess) Start() error {
|
|
if ep.proc != nil {
|
|
panic("already started")
|
|
}
|
|
proc, err := spawnCmd(append([]string{ep.cfg.execPath}, ep.cfg.args...))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ep.proc = proc
|
|
return ep.waitReady()
|
|
}
|
|
|
|
func (ep *etcdServerProcess) Restart() error {
|
|
if err := ep.Stop(); err != nil {
|
|
return err
|
|
}
|
|
ep.donec = make(chan struct{})
|
|
return ep.Start()
|
|
}
|
|
|
|
func (ep *etcdServerProcess) Stop() (err error) {
|
|
if ep == nil || ep.proc == nil {
|
|
return nil
|
|
}
|
|
err = ep.proc.Stop()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ep.proc = nil
|
|
<-ep.donec
|
|
ep.donec = make(chan struct{})
|
|
if ep.cfg.purl.Scheme == "unix" || ep.cfg.purl.Scheme == "unixs" {
|
|
err = os.Remove(ep.cfg.purl.Host + ep.cfg.purl.Path)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (ep *etcdServerProcess) Close() error {
|
|
if err := ep.Stop(); err != nil {
|
|
return err
|
|
}
|
|
return os.RemoveAll(ep.cfg.dataDirPath)
|
|
}
|
|
|
|
func (ep *etcdServerProcess) WithStopSignal(sig os.Signal) os.Signal {
|
|
ret := ep.proc.StopSignal
|
|
ep.proc.StopSignal = sig
|
|
return ret
|
|
}
|
|
|
|
func (ep *etcdServerProcess) waitReady() error {
|
|
defer close(ep.donec)
|
|
return waitReadyExpectProc(ep.proc, etcdServerReadyLines)
|
|
}
|
|
|
|
func (ep *etcdServerProcess) Config() *etcdServerProcessConfig { return ep.cfg }
|