etcdserver: add cluster id check for hashKVHandler

backport https://github.com/etcd-io/etcd/pull/15924 to 3.4

Signed-off-by: Benjamin Wang <benjamin.ahrtr@gmail.com>
This commit is contained in:
Benjamin Wang 2023-11-22 15:09:55 +00:00
parent fe68345104
commit c750e01e37
9 changed files with 283 additions and 14 deletions

View File

@ -51,7 +51,7 @@ var (
RaftSnapshotPrefix = path.Join(RaftPrefix, "snapshot")
errIncompatibleVersion = errors.New("incompatible version")
errClusterIDMismatch = errors.New("cluster ID mismatch")
ErrClusterIDMismatch = errors.New("cluster ID mismatch")
)
type peerGetter interface {
@ -558,7 +558,7 @@ func checkClusterCompatibilityFromHeader(lg *zap.Logger, localID types.ID, heade
} else {
plog.Errorf("request cluster ID mismatch (got %s want %s)", gcid, cid)
}
return errClusterIDMismatch
return ErrClusterIDMismatch
}
return nil
}

View File

@ -677,7 +677,7 @@ func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) {
}
return nil, errIncompatibleVersion
case errClusterIDMismatch.Error():
case ErrClusterIDMismatch.Error():
if cr.lg != nil {
cr.lg.Warn(
"request sent was ignored by remote peer due to cluster ID mismatch",
@ -685,13 +685,13 @@ func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) {
zap.String("remote-peer-cluster-id", resp.Header.Get("X-Etcd-Cluster-ID")),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("local-member-cluster-id", cr.tr.ClusterID.String()),
zap.Error(errClusterIDMismatch),
zap.Error(ErrClusterIDMismatch),
)
} else {
plog.Errorf("request sent was ignored (cluster ID mismatch: peer[%s]=%s, local=%s)",
cr.peerID, resp.Header.Get("X-Etcd-Cluster-ID"), cr.tr.ClusterID)
}
return nil, errClusterIDMismatch
return nil, ErrClusterIDMismatch
default:
return nil, fmt.Errorf("unhandled error %q when precondition failed", string(b))

View File

@ -86,10 +86,10 @@ func checkPostResponse(resp *http.Response, body []byte, req *http.Request, to t
case errIncompatibleVersion.Error():
plog.Errorf("request sent was ignored by peer %s (server version incompatible)", to)
return errIncompatibleVersion
case errClusterIDMismatch.Error():
case ErrClusterIDMismatch.Error():
plog.Errorf("request sent was ignored (cluster ID mismatch: remote[%s]=%s, local=%s)",
to, resp.Header.Get("X-Etcd-Cluster-ID"), req.Header.Get("X-Etcd-Cluster-ID"))
return errClusterIDMismatch
return ErrClusterIDMismatch
default:
return fmt.Errorf("unhandled error %q when precondition failed", string(body))
}

View File

@ -45,6 +45,7 @@ var (
ErrGRPCMemberNotLearner = status.New(codes.FailedPrecondition, "etcdserver: can only promote a learner member").Err()
ErrGRPCLearnerNotReady = status.New(codes.FailedPrecondition, "etcdserver: can only promote a learner member which is in sync with leader").Err()
ErrGRPCTooManyLearners = status.New(codes.FailedPrecondition, "etcdserver: too many learner members in cluster").Err()
ErrGRPCClusterIdMismatch = status.New(codes.FailedPrecondition, "etcdserver: cluster ID mismatch").Err()
ErrGRPCRequestTooLarge = status.New(codes.InvalidArgument, "etcdserver: request is too large").Err()
ErrGRPCRequestTooManyRequests = status.New(codes.ResourceExhausted, "etcdserver: too many requests").Err()
@ -105,6 +106,7 @@ var (
ErrorDesc(ErrGRPCMemberNotLearner): ErrGRPCMemberNotLearner,
ErrorDesc(ErrGRPCLearnerNotReady): ErrGRPCLearnerNotReady,
ErrorDesc(ErrGRPCTooManyLearners): ErrGRPCTooManyLearners,
ErrorDesc(ErrGRPCClusterIdMismatch): ErrGRPCClusterIdMismatch,
ErrorDesc(ErrGRPCRequestTooLarge): ErrGRPCRequestTooLarge,
ErrorDesc(ErrGRPCRequestTooManyRequests): ErrGRPCRequestTooManyRequests,
@ -186,6 +188,7 @@ var (
ErrInvalidAuthToken = Error(ErrGRPCInvalidAuthToken)
ErrAuthOldRevision = Error(ErrGRPCAuthOldRevision)
ErrInvalidAuthMgmt = Error(ErrGRPCInvalidAuthMgmt)
ErrClusterIdMismatch = Error(ErrGRPCClusterIdMismatch)
ErrNoLeader = Error(ErrGRPCNoLeader)
ErrNotLeader = Error(ErrGRPCNotLeader)

View File

@ -24,6 +24,7 @@ import (
"strings"
"time"
"go.etcd.io/etcd/etcdserver/api/rafthttp"
"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
"go.etcd.io/etcd/mvcc"
@ -126,6 +127,19 @@ func (s *EtcdServer) CheckInitialHashKV() error {
} else {
plog.Warningf("%s cannot check the hash of peer(%q) at revision %d: local node is lagging behind(%q)", s.ID(), p.eps, rev, p.err.Error())
}
case rpctypes.ErrClusterIdMismatch:
if lg != nil {
lg.Warn(
"cluster ID mismatch",
zap.String("local-member-id", s.ID().String()),
zap.Int64("local-member-revision", rev),
zap.Int64("local-member-compact-revision", crev),
zap.Uint32("local-member-hash", h),
zap.String("remote-peer-id", p.id.String()),
zap.Strings("remote-peer-endpoints", p.eps),
zap.Error(err),
)
}
}
}
}
@ -353,7 +367,7 @@ func (s *EtcdServer) getPeerHashKVs(rev int64) []*peerHashKVResp {
ctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
var resp *pb.HashKVResponse
resp, lastErr = s.getPeerHashKVHTTP(ctx, ep, rev)
resp, lastErr = s.getPeerHashKVHTTP(ctx, s.cluster.ID(), ep, rev)
cancel()
if lastErr == nil {
resps = append(resps, &peerHashKVResp{peerInfo: p, resp: resp, err: nil})
@ -440,6 +454,10 @@ func (h *hashKVHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
http.Error(w, "bad path", http.StatusBadRequest)
return
}
if gcid := r.Header.Get("X-Etcd-Cluster-ID"); gcid != "" && gcid != h.server.cluster.ID().String() {
http.Error(w, rafthttp.ErrClusterIDMismatch.Error(), http.StatusPreconditionFailed)
return
}
defer r.Body.Close()
b, err := ioutil.ReadAll(r.Body)
@ -478,7 +496,7 @@ func (h *hashKVHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
// getPeerHashKVHTTP fetch hash of kv store at the given rev via http call to the given url
func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, url string, rev int64) (*pb.HashKVResponse, error) {
func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, cid types.ID, url string, rev int64) (*pb.HashKVResponse, error) {
cc := &http.Client{Transport: s.peerRt}
hashReq := &pb.HashKVRequest{Revision: rev}
hashReqBytes, err := json.Marshal(hashReq)
@ -492,6 +510,7 @@ func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, url string, rev int6
}
req = req.WithContext(ctx)
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Etcd-Cluster-ID", cid.String())
req.Cancel = ctx.Done()
resp, err := cc.Do(req)
@ -511,6 +530,10 @@ func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, url string, rev int6
if strings.Contains(string(b), mvcc.ErrFutureRev.Error()) {
return nil, rpctypes.ErrFutureRev
}
} else if resp.StatusCode == http.StatusPreconditionFailed {
if strings.Contains(string(b), rafthttp.ErrClusterIDMismatch.Error()) {
return nil, rpctypes.ErrClusterIdMismatch
}
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unknown error: %s", string(b))

122
etcdserver/corrupt_test.go Normal file
View File

@ -0,0 +1,122 @@
// Copyright 2023 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"bytes"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"strconv"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"go.uber.org/zap"
pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
"go.etcd.io/etcd/lease"
"go.etcd.io/etcd/mvcc"
betesting "go.etcd.io/etcd/mvcc/backend"
"go.etcd.io/etcd/pkg/types"
)
func TestHashKVHandler(t *testing.T) {
var remoteClusterID = 111195
var localClusterID = 111196
var revision = 1
etcdSrv := &EtcdServer{}
etcdSrv.cluster = newTestCluster(nil)
etcdSrv.cluster.SetID(types.ID(localClusterID), types.ID(localClusterID))
be, _ := betesting.NewDefaultTmpBackend()
defer func() {
assert.NoError(t, be.Close())
}()
etcdSrv.kv = mvcc.New(zap.NewNop(), be, &lease.FakeLessor{}, nil, nil, mvcc.StoreConfig{})
ph := &hashKVHandler{
lg: zap.NewNop(),
server: etcdSrv,
}
srv := httptest.NewServer(ph)
defer srv.Close()
tests := []struct {
name string
remoteClusterID int
wcode int
wKeyWords string
}{
{
name: "HashKV returns 200 if cluster hash matches",
remoteClusterID: localClusterID,
wcode: http.StatusOK,
wKeyWords: "",
},
{
name: "HashKV returns 400 if cluster hash doesn't matche",
remoteClusterID: remoteClusterID,
wcode: http.StatusPreconditionFailed,
wKeyWords: "cluster ID mismatch",
},
}
for i, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hashReq := &pb.HashKVRequest{Revision: int64(revision)}
hashReqBytes, err := json.Marshal(hashReq)
if err != nil {
t.Fatalf("failed to marshal request: %v", err)
}
req, err := http.NewRequest(http.MethodGet, srv.URL+PeerHashKVPath, bytes.NewReader(hashReqBytes))
if err != nil {
t.Fatalf("failed to create request: %v", err)
}
req.Header.Set("X-Etcd-Cluster-ID", strconv.FormatUint(uint64(tt.remoteClusterID), 16))
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("failed to get http response: %v", err)
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
t.Fatalf("unexpected io.ReadAll error: %v", err)
}
if resp.StatusCode != tt.wcode {
t.Fatalf("#%d: code = %d, want %d", i, resp.StatusCode, tt.wcode)
}
if resp.StatusCode != http.StatusOK {
if !strings.Contains(string(body), tt.wKeyWords) {
t.Errorf("#%d: body: %s, want body to contain keywords: %s", i, string(body), tt.wKeyWords)
}
return
}
hashKVResponse := pb.HashKVResponse{}
err = json.Unmarshal(body, &hashKVResponse)
if err != nil {
t.Fatalf("unmarshal response error: %v", err)
}
hashValue, _, _, err := etcdSrv.KV().HashByRev(int64(revision))
if err != nil {
t.Fatalf("etcd server hash failed: %v", err)
}
if hashKVResponse.Hash != hashValue {
t.Fatalf("hash value inconsistent: %d != %d", hashKVResponse.Hash, hashValue)
}
})
}
}

View File

@ -128,6 +128,7 @@ type etcdProcessClusterConfig struct {
noStrictReconfig bool
enableV2 bool
initialCorruptCheck bool
corruptCheckTime time.Duration
authTokenOpts string
MaxConcurrentStreams uint32 // default is math.MaxUint32
@ -141,6 +142,17 @@ type etcdProcessClusterConfig struct {
// newEtcdProcessCluster launches a new cluster from etcd processes, returning
// a new etcdProcessCluster once all nodes are ready to accept client requests.
func newEtcdProcessCluster(cfg *etcdProcessClusterConfig) (*etcdProcessCluster, error) {
epc, err := initEtcdProcessCluster(cfg)
if err != nil {
return nil, err
}
return startEtcdProcessCluster(epc, cfg)
}
// `initEtcdProcessCluster` initializes a new cluster based on the given config.
// It doesn't start the cluster.
func initEtcdProcessCluster(cfg *etcdProcessClusterConfig) (*etcdProcessCluster, error) {
etcdCfgs := cfg.etcdServerProcessConfigs()
epc := &etcdProcessCluster{
cfg: cfg,
@ -158,6 +170,11 @@ func newEtcdProcessCluster(cfg *etcdProcessClusterConfig) (*etcdProcessCluster,
epc.procs[i] = proc
}
return epc, nil
}
// `startEtcdProcessCluster` launches a new cluster from etcd processes.
func startEtcdProcessCluster(epc *etcdProcessCluster, cfg *etcdProcessClusterConfig) (*etcdProcessCluster, error) {
if err := epc.Start(); err != nil {
return nil, err
}
@ -262,6 +279,9 @@ func (cfg *etcdProcessClusterConfig) etcdServerProcessConfigs() []*etcdServerPro
if cfg.initialCorruptCheck {
args = append(args, "--experimental-initial-corrupt-check")
}
if cfg.corruptCheckTime != 0 {
args = append(args, "--experimental-corrupt-check-time", cfg.corruptCheckTime.String())
}
var murl string
if cfg.metricsURLScheme != "" {
murl = (&url.URL{

View File

@ -20,11 +20,16 @@ import (
"fmt"
"os"
"path/filepath"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
"go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/etcdserver/etcdserverpb"
"go.etcd.io/etcd/mvcc/mvccpb"
"go.etcd.io/etcd/pkg/testutil"
bolt "go.etcd.io/bbolt"
)
@ -127,3 +132,96 @@ func corruptHash(fpath string) error {
return nil
})
}
func TestInPlaceRecovery(t *testing.T) {
defer testutil.AfterTest(t)
basePort := 20000
// Initialize the cluster.
cfgOld := etcdProcessClusterConfig{
clusterSize: 3,
initialToken: "old",
keepDataDir: false,
clientTLS: clientNonTLS,
corruptCheckTime: time.Second,
basePort: basePort,
}
epcOld, err := newEtcdProcessCluster(&cfgOld)
if err != nil {
t.Fatalf("could not start etcd process cluster (%v)", err)
}
t.Cleanup(func() {
if errC := epcOld.Close(); errC != nil {
t.Fatalf("error closing etcd processes (%v)", errC)
}
})
t.Log("Old cluster started.")
//Put some data into the old cluster, so that after recovering from a blank db, the hash diverges.
t.Log("putting 10 keys...")
oldEtcdctl := NewEtcdctl(epcOld.EndpointsV3(), cfgOld.clientTLS, false, false)
for i := 0; i < 10; i++ {
err := oldEtcdctl.Put(fmt.Sprintf("%d", i), fmt.Sprintf("%d", i))
assert.NoError(t, err, "error on put")
}
// Create a new cluster config, but with the same port numbers. In this way the new servers can stay in
// contact with the old ones.
cfgNew := etcdProcessClusterConfig{
clusterSize: 3,
initialToken: "new",
keepDataDir: false,
clientTLS: clientNonTLS,
initialCorruptCheck: true,
corruptCheckTime: time.Second,
basePort: basePort,
}
epcNew, err := initEtcdProcessCluster(&cfgNew)
if err != nil {
t.Fatalf("could not start etcd process cluster (%v)", err)
}
t.Cleanup(func() {
if errC := epcNew.Close(); errC != nil {
t.Fatalf("error closing etcd processes (%v)", errC)
}
})
t.Log("New cluster initialized.")
newEtcdctl := NewEtcdctl(epcNew.EndpointsV3(), cfgNew.clientTLS, false, false)
// Rolling recovery of the servers.
var wg sync.WaitGroup
t.Log("rolling updating servers in place...")
for i, newProc := range epcNew.procs {
oldProc := epcOld.procs[i]
err = oldProc.Close()
if err != nil {
t.Fatalf("could not stop etcd process (%v)", err)
}
t.Logf("old cluster server %d: %s stopped.", i, oldProc.Config().name)
wg.Add(1)
go func(proc etcdProcess) {
defer wg.Done()
perr := proc.Start()
if perr != nil {
t.Fatalf("failed to start etcd process: %v", perr)
return
}
t.Logf("new etcd server %q started in-place with blank db", proc.Config().name)
}(newProc)
t.Log("sleeping 5 sec to let nodes do periodical check...")
time.Sleep(5 * time.Second)
}
wg.Wait()
t.Log("new cluster started.")
alarmResponse, err := newEtcdctl.AlarmList()
assert.NoError(t, err, "error on alarm list")
for _, alarm := range alarmResponse.Alarms {
if alarm.Alarm == etcdserverpb.AlarmType_CORRUPT {
t.Fatalf("there is no corruption after in-place recovery, but corruption reported.")
}
}
t.Log("no corruption detected.")
}

View File

@ -40,7 +40,7 @@ func NewEtcdctl(endpoints []string, connType clientConnType, isAutoTLS bool, v2
func (ctl *Etcdctl) Get(key string) (*clientv3.GetResponse, error) {
var resp clientv3.GetResponse
err := ctl.spawnJsonCmd(&resp, "get", key)
err := ctl.spawnJsonCmd(&resp, "", "get", key)
return &resp, err
}
@ -67,7 +67,7 @@ func (ctl *Etcdctl) AlarmList() (*clientv3.AlarmResponse, error) {
panic("Unsupported method for v2")
}
var resp clientv3.AlarmResponse
err := ctl.spawnJsonCmd(&resp, "alarm", "list")
err := ctl.spawnJsonCmd(&resp, "{", "alarm", "list")
return &resp, err
}
@ -76,7 +76,7 @@ func (ctl *Etcdctl) MemberList() (*clientv3.MemberListResponse, error) {
panic("Unsupported method for v2")
}
var resp clientv3.MemberListResponse
err := ctl.spawnJsonCmd(&resp, "member", "list")
err := ctl.spawnJsonCmd(&resp, "", "member", "list")
return &resp, err
}
@ -88,13 +88,16 @@ func (ctl *Etcdctl) Compact(rev int64) (*clientv3.CompactResponse, error) {
return nil, spawnWithExpect(args, fmt.Sprintf("compacted revision %v", rev))
}
func (ctl *Etcdctl) spawnJsonCmd(output interface{}, args ...string) error {
func (ctl *Etcdctl) spawnJsonCmd(output interface{}, expectedOutput string, args ...string) error {
args = append(args, "-w", "json")
cmd, err := spawnCmd(append(ctl.cmdArgs(), args...))
if err != nil {
return err
}
line, err := cmd.Expect("header")
if expectedOutput == "" {
expectedOutput = "header"
}
line, err := cmd.Expect(expectedOutput)
if err != nil {
return err
}