mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #14272 from serathius/memberID
memberID equals zero in corruption alarm
This commit is contained in:
commit
37e4193c60
@ -16,6 +16,7 @@ Previous change logs can be found at [CHANGELOG-3.4](https://github.com/etcd-io/
|
|||||||
- Fix [lessor may continue to schedule checkpoint after stepping down leader role](https://github.com/etcd-io/etcd/pull/14087).
|
- Fix [lessor may continue to schedule checkpoint after stepping down leader role](https://github.com/etcd-io/etcd/pull/14087).
|
||||||
- Fix [Restrict the max size of each WAL entry to the remaining size of the WAL file](https://github.com/etcd-io/etcd/pull/14127).
|
- Fix [Restrict the max size of each WAL entry to the remaining size of the WAL file](https://github.com/etcd-io/etcd/pull/14127).
|
||||||
- Fix [Protect rangePermCache with a RW lock correctly](https://github.com/etcd-io/etcd/pull/14227)
|
- Fix [Protect rangePermCache with a RW lock correctly](https://github.com/etcd-io/etcd/pull/14227)
|
||||||
|
- Fix [memberID equals zero in corruption alarm](https://github.com/etcd-io/etcd/pull/14272)
|
||||||
-
|
-
|
||||||
### Other
|
### Other
|
||||||
- [Bump golang.org/x/crypto to latest version](https://github.com/etcd-io/etcd/pull/13996) to address [CVE-2022-27191](https://github.com/advisories/GHSA-8c26-wmh5-6g9v).
|
- [Bump golang.org/x/crypto to latest version](https://github.com/etcd-io/etcd/pull/13996) to address [CVE-2022-27191](https://github.com/advisories/GHSA-8c26-wmh5-6g9v).
|
||||||
|
@ -73,6 +73,7 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.0...v3.6.0).
|
|||||||
- Fix [etcd panic on startup (auth enabled)](https://github.com/etcd-io/etcd/pull/13942)
|
- Fix [etcd panic on startup (auth enabled)](https://github.com/etcd-io/etcd/pull/13942)
|
||||||
- Fix [Defrag unsets backend options](https://github.com/etcd-io/etcd/pull/13679).
|
- Fix [Defrag unsets backend options](https://github.com/etcd-io/etcd/pull/13679).
|
||||||
- Fix [Restrict the max size of each WAL entry to the remaining size of the WAL file](https://github.com/etcd-io/etcd/pull/14122).
|
- Fix [Restrict the max size of each WAL entry to the remaining size of the WAL file](https://github.com/etcd-io/etcd/pull/14122).
|
||||||
|
- Fix [memberID equals zero in corruption alarm](https://github.com/etcd-io/etcd/pull/14272)
|
||||||
|
|
||||||
### tools/benchmark
|
### tools/benchmark
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ type Hasher interface {
|
|||||||
MemberId() types.ID
|
MemberId() types.ID
|
||||||
PeerHashByRev(int64) []*peerHashKVResp
|
PeerHashByRev(int64) []*peerHashKVResp
|
||||||
LinearizableReadNotify(context.Context) error
|
LinearizableReadNotify(context.Context) error
|
||||||
TriggerCorruptAlarm(uint64)
|
TriggerCorruptAlarm(types.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func newCorruptionChecker(lg *zap.Logger, s *EtcdServer, storage mvcc.HashStorage) *corruptionChecker {
|
func newCorruptionChecker(lg *zap.Logger, s *EtcdServer, storage mvcc.HashStorage) *corruptionChecker {
|
||||||
@ -78,7 +78,7 @@ func (h hasherAdapter) PeerHashByRev(rev int64) []*peerHashKVResp {
|
|||||||
return h.EtcdServer.getPeerHashKVs(rev)
|
return h.EtcdServer.getPeerHashKVs(rev)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h hasherAdapter) TriggerCorruptAlarm(memberID uint64) {
|
func (h hasherAdapter) TriggerCorruptAlarm(memberID types.ID) {
|
||||||
h.EtcdServer.triggerCorruptAlarm(memberID)
|
h.EtcdServer.triggerCorruptAlarm(memberID)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,7 +184,7 @@ func (cm *corruptionChecker) PeriodicCheck() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
alarmed := false
|
alarmed := false
|
||||||
mismatch := func(id uint64) {
|
mismatch := func(id types.ID) {
|
||||||
if alarmed {
|
if alarmed {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -202,7 +202,7 @@ func (cm *corruptionChecker) PeriodicCheck() error {
|
|||||||
zap.Int64("compact-revision-2", h2.CompactRevision),
|
zap.Int64("compact-revision-2", h2.CompactRevision),
|
||||||
zap.Uint32("hash-2", h2.Hash),
|
zap.Uint32("hash-2", h2.Hash),
|
||||||
)
|
)
|
||||||
mismatch(uint64(cm.hasher.MemberId()))
|
mismatch(cm.hasher.MemberId())
|
||||||
}
|
}
|
||||||
|
|
||||||
checkedCount := 0
|
checkedCount := 0
|
||||||
@ -211,7 +211,6 @@ func (cm *corruptionChecker) PeriodicCheck() error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
checkedCount++
|
checkedCount++
|
||||||
id := p.resp.Header.MemberId
|
|
||||||
|
|
||||||
// leader expects follower's latest revision less than or equal to leader's
|
// leader expects follower's latest revision less than or equal to leader's
|
||||||
if p.resp.Header.Revision > rev2 {
|
if p.resp.Header.Revision > rev2 {
|
||||||
@ -219,9 +218,9 @@ func (cm *corruptionChecker) PeriodicCheck() error {
|
|||||||
"revision from follower must be less than or equal to leader's",
|
"revision from follower must be less than or equal to leader's",
|
||||||
zap.Int64("leader-revision", rev2),
|
zap.Int64("leader-revision", rev2),
|
||||||
zap.Int64("follower-revision", p.resp.Header.Revision),
|
zap.Int64("follower-revision", p.resp.Header.Revision),
|
||||||
zap.String("follower-peer-id", types.ID(id).String()),
|
zap.String("follower-peer-id", p.id.String()),
|
||||||
)
|
)
|
||||||
mismatch(id)
|
mismatch(p.id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// leader expects follower's latest compact revision less than or equal to leader's
|
// leader expects follower's latest compact revision less than or equal to leader's
|
||||||
@ -230,9 +229,9 @@ func (cm *corruptionChecker) PeriodicCheck() error {
|
|||||||
"compact revision from follower must be less than or equal to leader's",
|
"compact revision from follower must be less than or equal to leader's",
|
||||||
zap.Int64("leader-compact-revision", h2.CompactRevision),
|
zap.Int64("leader-compact-revision", h2.CompactRevision),
|
||||||
zap.Int64("follower-compact-revision", p.resp.CompactRevision),
|
zap.Int64("follower-compact-revision", p.resp.CompactRevision),
|
||||||
zap.String("follower-peer-id", types.ID(id).String()),
|
zap.String("follower-peer-id", p.id.String()),
|
||||||
)
|
)
|
||||||
mismatch(id)
|
mismatch(p.id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// follower's compact revision is leader's old one, then hashes must match
|
// follower's compact revision is leader's old one, then hashes must match
|
||||||
@ -243,9 +242,9 @@ func (cm *corruptionChecker) PeriodicCheck() error {
|
|||||||
zap.Uint32("leader-hash", h.Hash),
|
zap.Uint32("leader-hash", h.Hash),
|
||||||
zap.Int64("follower-compact-revision", p.resp.CompactRevision),
|
zap.Int64("follower-compact-revision", p.resp.CompactRevision),
|
||||||
zap.Uint32("follower-hash", p.resp.Hash),
|
zap.Uint32("follower-hash", p.resp.Hash),
|
||||||
zap.String("follower-peer-id", types.ID(id).String()),
|
zap.String("follower-peer-id", p.id.String()),
|
||||||
)
|
)
|
||||||
mismatch(id)
|
mismatch(p.id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cm.lg.Info("finished peer corruption check", zap.Int("number-of-peers-checked", checkedCount))
|
cm.lg.Info("finished peer corruption check", zap.Int("number-of-peers-checked", checkedCount))
|
||||||
@ -272,7 +271,7 @@ func (cm *corruptionChecker) CompactHashCheck() {
|
|||||||
|
|
||||||
// follower's compact revision is leader's old one, then hashes must match
|
// follower's compact revision is leader's old one, then hashes must match
|
||||||
if p.resp.Hash != hash.Hash {
|
if p.resp.Hash != hash.Hash {
|
||||||
cm.hasher.TriggerCorruptAlarm(uint64(p.id))
|
cm.hasher.TriggerCorruptAlarm(p.id)
|
||||||
cm.lg.Error("failed compaction hash check",
|
cm.lg.Error("failed compaction hash check",
|
||||||
zap.Int64("revision", hash.Revision),
|
zap.Int64("revision", hash.Revision),
|
||||||
zap.Int64("leader-compact-revision", hash.CompactRevision),
|
zap.Int64("leader-compact-revision", hash.CompactRevision),
|
||||||
@ -330,9 +329,9 @@ func (cm *corruptionChecker) uncheckedRevisions() []mvcc.KeyValueHash {
|
|||||||
return hashes
|
return hashes
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) triggerCorruptAlarm(id uint64) {
|
func (s *EtcdServer) triggerCorruptAlarm(id types.ID) {
|
||||||
a := &pb.AlarmRequest{
|
a := &pb.AlarmRequest{
|
||||||
MemberID: id,
|
MemberID: uint64(id),
|
||||||
Action: pb.AlarmRequest_ACTIVATE,
|
Action: pb.AlarmRequest_ACTIVATE,
|
||||||
Alarm: pb.AlarmType_CORRUPT,
|
Alarm: pb.AlarmType_CORRUPT,
|
||||||
}
|
}
|
||||||
|
@ -161,7 +161,7 @@ func TestPeriodicCheck(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "Peer with newer revision",
|
name: "Peer with newer revision",
|
||||||
hasher: fakeHasher{
|
hasher: fakeHasher{
|
||||||
peerHashes: []*peerHashKVResp{{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1, MemberId: 42}}}},
|
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}}}},
|
||||||
},
|
},
|
||||||
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(42)"},
|
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(42)"},
|
||||||
expectCorrupt: true,
|
expectCorrupt: true,
|
||||||
@ -169,7 +169,7 @@ func TestPeriodicCheck(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "Peer with newer compact revision",
|
name: "Peer with newer compact revision",
|
||||||
hasher: fakeHasher{
|
hasher: fakeHasher{
|
||||||
peerHashes: []*peerHashKVResp{{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10, MemberId: 88}, CompactRevision: 2}}},
|
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 88}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}}},
|
||||||
},
|
},
|
||||||
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
|
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
|
||||||
expectCorrupt: true,
|
expectCorrupt: true,
|
||||||
@ -186,7 +186,7 @@ func TestPeriodicCheck(t *testing.T) {
|
|||||||
name: "Peer with different hash and same compact revision as first local",
|
name: "Peer with different hash and same compact revision as first local",
|
||||||
hasher: fakeHasher{
|
hasher: fakeHasher{
|
||||||
hashByRevResponses: []hashByRev{{hash: mvcc.KeyValueHash{Hash: 1, CompactRevision: 1}, revision: 1}, {hash: mvcc.KeyValueHash{Hash: 2, CompactRevision: 2}, revision: 2}},
|
hashByRevResponses: []hashByRev{{hash: mvcc.KeyValueHash{Hash: 1, CompactRevision: 1}, revision: 1}, {hash: mvcc.KeyValueHash{Hash: 2, CompactRevision: 2}, revision: 2}},
|
||||||
peerHashes: []*peerHashKVResp{{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1, MemberId: 666}, CompactRevision: 1, Hash: 2}}},
|
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 666}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}, CompactRevision: 1, Hash: 2}}},
|
||||||
},
|
},
|
||||||
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(666)"},
|
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(666)"},
|
||||||
expectCorrupt: true,
|
expectCorrupt: true,
|
||||||
@ -195,8 +195,8 @@ func TestPeriodicCheck(t *testing.T) {
|
|||||||
name: "Multiple corrupted peers trigger one alarm",
|
name: "Multiple corrupted peers trigger one alarm",
|
||||||
hasher: fakeHasher{
|
hasher: fakeHasher{
|
||||||
peerHashes: []*peerHashKVResp{
|
peerHashes: []*peerHashKVResp{
|
||||||
{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10, MemberId: 88}, CompactRevision: 2}},
|
{peerInfo: peerInfo{id: 88}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}},
|
||||||
{resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10, MemberId: 89}, CompactRevision: 2}},
|
{peerInfo: peerInfo{id: 89}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
|
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
|
||||||
@ -371,7 +371,7 @@ func (f *fakeHasher) LinearizableReadNotify(ctx context.Context) error {
|
|||||||
return f.linearizableReadNotify
|
return f.linearizableReadNotify
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *fakeHasher) TriggerCorruptAlarm(memberId uint64) {
|
func (f *fakeHasher) TriggerCorruptAlarm(memberId types.ID) {
|
||||||
f.actions = append(f.actions, fmt.Sprintf("TriggerCorruptAlarm(%d)", memberId))
|
f.actions = append(f.actions, fmt.Sprintf("TriggerCorruptAlarm(%d)", memberId))
|
||||||
f.alarmTriggered = true
|
f.alarmTriggered = true
|
||||||
}
|
}
|
||||||
|
@ -121,6 +121,15 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
|
|||||||
assert.NoError(t, err, "error on put")
|
assert.NoError(t, err, "error on put")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
members, err := cc.MemberList()
|
||||||
|
assert.NoError(t, err, "error on member list")
|
||||||
|
var memberID uint64
|
||||||
|
for _, m := range members.Members {
|
||||||
|
if m.Name == epc.Procs[0].Config().Name {
|
||||||
|
memberID = m.ID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.NotZero(t, memberID, "member not found")
|
||||||
epc.Procs[0].Stop()
|
epc.Procs[0].Stop()
|
||||||
err = testutil.CorruptBBolt(datadir.ToBackendFileName(epc.Procs[0].Config().DataDirPath))
|
err = testutil.CorruptBBolt(datadir.ToBackendFileName(epc.Procs[0].Config().DataDirPath))
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
@ -130,8 +139,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
|
|||||||
time.Sleep(checkTime * 11 / 10)
|
time.Sleep(checkTime * 11 / 10)
|
||||||
alarmResponse, err := cc.AlarmList()
|
alarmResponse, err := cc.AlarmList()
|
||||||
assert.NoError(t, err, "error on alarm list")
|
assert.NoError(t, err, "error on alarm list")
|
||||||
// TODO: Investigate why MemberID is 0?
|
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
|
||||||
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCompactHashCheckDetectCorruption(t *testing.T) {
|
func TestCompactHashCheckDetectCorruption(t *testing.T) {
|
||||||
|
@ -97,8 +97,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
|
|||||||
time.Sleep(50 * time.Millisecond)
|
time.Sleep(50 * time.Millisecond)
|
||||||
alarmResponse, err := cc.AlarmList(ctx)
|
alarmResponse, err := cc.AlarmList(ctx)
|
||||||
assert.NoError(t, err, "error on alarm list")
|
assert.NoError(t, err, "error on alarm list")
|
||||||
// TODO: Investigate why MemberID is 0?
|
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: uint64(clus.Members[0].ID())}}, alarmResponse.Alarms)
|
||||||
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCompactHashCheck(t *testing.T) {
|
func TestCompactHashCheck(t *testing.T) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user