mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: resolve review comments in PR 14828
Signed-off-by: Benjamin Wang <wachao@vmware.com>
This commit is contained in:
parent
7b19ee6396
commit
85fc09d09b
@ -279,7 +279,7 @@ func (cm *corruptionChecker) CompactHashCheck() {
|
|||||||
// false: skipped some members, so need to check next hash
|
// false: skipped some members, so need to check next hash
|
||||||
func (cm *corruptionChecker) checkPeerHashes(leaderHash mvcc.KeyValueHash, peers []*peerHashKVResp) bool {
|
func (cm *corruptionChecker) checkPeerHashes(leaderHash mvcc.KeyValueHash, peers []*peerHashKVResp) bool {
|
||||||
leaderId := cm.hasher.MemberId()
|
leaderId := cm.hasher.MemberId()
|
||||||
hashMap := map[uint32]types.IDSlice{leaderHash.Hash: {leaderId}}
|
hash2members := map[uint32]types.IDSlice{leaderHash.Hash: {leaderId}}
|
||||||
|
|
||||||
peersChecked := 0
|
peersChecked := 0
|
||||||
// group all peers by hash
|
// group all peers by hash
|
||||||
@ -304,16 +304,16 @@ func (cm *corruptionChecker) checkPeerHashes(leaderHash mvcc.KeyValueHash, peers
|
|||||||
}
|
}
|
||||||
|
|
||||||
peersChecked++
|
peersChecked++
|
||||||
if ids, ok := hashMap[peer.resp.Hash]; !ok {
|
if ids, ok := hash2members[peer.resp.Hash]; !ok {
|
||||||
hashMap[peer.resp.Hash] = []types.ID{peer.id}
|
hash2members[peer.resp.Hash] = []types.ID{peer.id}
|
||||||
} else {
|
} else {
|
||||||
ids = append(ids, peer.id)
|
ids = append(ids, peer.id)
|
||||||
hashMap[peer.resp.Hash] = ids
|
hash2members[peer.resp.Hash] = ids
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// All members have the same CompactRevision and Hash.
|
// All members have the same CompactRevision and Hash.
|
||||||
if len(hashMap) == 1 {
|
if len(hash2members) == 1 {
|
||||||
if peersChecked == len(peers) {
|
if peersChecked == len(peers) {
|
||||||
cm.lg.Info("successfully checked hash on whole cluster",
|
cm.lg.Info("successfully checked hash on whole cluster",
|
||||||
zap.Int("number-of-peers-checked", peersChecked),
|
zap.Int("number-of-peers-checked", peersChecked),
|
||||||
@ -342,32 +342,25 @@ func (cm *corruptionChecker) checkPeerHashes(leaderHash mvcc.KeyValueHash, peers
|
|||||||
memberCnt := len(peers) + 1
|
memberCnt := len(peers) + 1
|
||||||
quorum := memberCnt/2 + 1
|
quorum := memberCnt/2 + 1
|
||||||
quorumExist := false
|
quorumExist := false
|
||||||
for k, v := range hashMap {
|
for k, v := range hash2members {
|
||||||
if len(v) >= quorum {
|
if len(v) >= quorum {
|
||||||
quorumExist = true
|
quorumExist = true
|
||||||
// remove the majority, and we might raise alarms for the left members.
|
// remove the majority, and we might raise alarms for the left members.
|
||||||
delete(hashMap, k)
|
delete(hash2members, k)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !quorumExist {
|
if !quorumExist {
|
||||||
// If quorumExist doesn't exist, then only raise alarm for the least minority
|
// If quorumExist doesn't exist, then only raise alarm for the least minority.
|
||||||
cm.lg.Error("Detected compaction hash mismatch but can't identify the corrupted members, so only raise alarm for the least minority",
|
// The first step is to find out the members which are the least minority.
|
||||||
zap.String("leader-id", leaderId.String()),
|
|
||||||
zap.Int64("leader-revision", leaderHash.Revision),
|
|
||||||
zap.Int64("leader-compact-revision", leaderHash.CompactRevision),
|
|
||||||
zap.Uint32("leader-hash", leaderHash.Hash),
|
|
||||||
)
|
|
||||||
|
|
||||||
// find out the members which are the least minority
|
|
||||||
var (
|
var (
|
||||||
minCnt int = math.MaxInt
|
minCnt int = math.MaxInt
|
||||||
hashVal uint32
|
hashVal uint32
|
||||||
memberIDs types.IDSlice
|
memberIDs types.IDSlice
|
||||||
)
|
)
|
||||||
|
|
||||||
for k, v := range hashMap {
|
for k, v := range hash2members {
|
||||||
if v.Len() < minCnt {
|
if v.Len() < minCnt {
|
||||||
minCnt = v.Len()
|
minCnt = v.Len()
|
||||||
hashVal = k
|
hashVal = k
|
||||||
@ -375,10 +368,11 @@ func (cm *corruptionChecker) checkPeerHashes(leaderHash mvcc.KeyValueHash, peers
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// raise alarms
|
||||||
for _, pid := range memberIDs {
|
for _, pid := range memberIDs {
|
||||||
cm.hasher.TriggerCorruptAlarm(pid)
|
cm.hasher.TriggerCorruptAlarm(pid)
|
||||||
}
|
}
|
||||||
delete(hashMap, hashVal)
|
delete(hash2members, hashVal)
|
||||||
|
|
||||||
cm.lg.Error("Detected compaction hash mismatch but can't identify the corrupted members, so only raise alarm for the least minority",
|
cm.lg.Error("Detected compaction hash mismatch but can't identify the corrupted members, so only raise alarm for the least minority",
|
||||||
zap.String("leader-id", leaderId.String()),
|
zap.String("leader-id", leaderId.String()),
|
||||||
@ -392,12 +386,13 @@ func (cm *corruptionChecker) checkPeerHashes(leaderHash mvcc.KeyValueHash, peers
|
|||||||
|
|
||||||
// Raise alarm for the left members if the quorum is present.
|
// Raise alarm for the left members if the quorum is present.
|
||||||
// But we should always generate error log for debugging.
|
// But we should always generate error log for debugging.
|
||||||
for k, v := range hashMap {
|
for k, v := range hash2members {
|
||||||
for _, pid := range v {
|
if quorumExist {
|
||||||
if quorumExist {
|
for _, pid := range v {
|
||||||
cm.hasher.TriggerCorruptAlarm(pid)
|
cm.hasher.TriggerCorruptAlarm(pid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cm.lg.Error("Detected compaction hash mismatch",
|
cm.lg.Error("Detected compaction hash mismatch",
|
||||||
zap.String("leader-id", leaderId.String()),
|
zap.String("leader-id", leaderId.String()),
|
||||||
zap.Int64("leader-revision", leaderHash.Revision),
|
zap.Int64("leader-revision", leaderHash.Revision),
|
||||||
|
@ -289,6 +289,24 @@ func TestCompactHashCheck(t *testing.T) {
|
|||||||
expectActions: []string{"MemberId()", "ReqTimeout()", "Hashes()", "PeerHashByRev(2)", "MemberId()", "TriggerCorruptAlarm(46)"},
|
expectActions: []string{"MemberId()", "ReqTimeout()", "Hashes()", "PeerHashByRev(2)", "MemberId()", "TriggerCorruptAlarm(46)"},
|
||||||
expectCorrupt: true,
|
expectCorrupt: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Peer returned same compaction revision but all members have different hash",
|
||||||
|
hasher: fakeHasher{
|
||||||
|
hashes: []mvcc.KeyValueHash{{Revision: 1, CompactRevision: 1, Hash: 1}, {Revision: 2, CompactRevision: 1, Hash: 2}},
|
||||||
|
// Each member is the least minority, and etcd may trigger alarm for any one.
|
||||||
|
// So intentionally set the same member id for all members.
|
||||||
|
peerHashes: []*peerHashKVResp{
|
||||||
|
{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 3}},
|
||||||
|
{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 4}},
|
||||||
|
{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 5}},
|
||||||
|
{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 6}},
|
||||||
|
{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 7}},
|
||||||
|
{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 8}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectActions: []string{"MemberId()", "ReqTimeout()", "Hashes()", "PeerHashByRev(2)", "MemberId()", "TriggerCorruptAlarm(42)"},
|
||||||
|
expectCorrupt: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Peer returned same hash bumps last revision checked",
|
name: "Peer returned same hash bumps last revision checked",
|
||||||
hasher: fakeHasher{
|
hasher: fakeHasher{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user