Skip to content

Commit

Permalink
etcdserver: intentionally set the memberID as 0 in corruption alarm
Browse files Browse the repository at this point in the history
Signed-off-by: Benjamin Wang <wachao@vmware.com>
  • Loading branch information
ahrtr committed Nov 25, 2022
1 parent cecbe35 commit 47ff26f
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 21 deletions.
6 changes: 4 additions & 2 deletions scripts/test_lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ function relativePath {
local commonPart=$source
local result=""

while [[ "${target#$commonPart}" == "${target}" ]]; do
# Refer to https://www.shellcheck.net/wiki/SC2295
while [[ "${target#"$commonPart"}" == "${target}" ]]; do
# no match, means that candidate common part is not correct
# go up one level (reduce common part)
commonPart="$(dirname "$commonPart")"
Expand All @@ -90,7 +91,8 @@ function relativePath {

# since we now have identified the common part,
# compute the non-common part
local forwardPart="${target#$commonPart}"
# Refer to https://www.shellcheck.net/wiki/SC2295
local forwardPart="${target"#$commonPart"}"

# and now stick all parts together
if [[ -n $result ]] && [[ -n $forwardPart ]]; then
Expand Down
12 changes: 10 additions & 2 deletions server/etcdserver/corrupt.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,11 @@ func (cm *corruptionChecker) PeriodicCheck() error {
return
}
alarmed = true
cm.hasher.TriggerCorruptAlarm(id)
// It isn't clear which member's data is corrupted, so we
// intentionally set the memberID as 0. We will identify
// the corrupted members using quorum in 3.6. Please see
// discussion in https://github.com/etcd-io/etcd/pull/14828.
cm.hasher.TriggerCorruptAlarm(types.ID(0))
}

if h2.Hash != h.Hash && rev2 == rev && h.CompactRevision == h2.CompactRevision {
Expand Down Expand Up @@ -276,7 +280,11 @@ func (cm *corruptionChecker) CompactHashCheck() {

// follower's compact revision is leader's old one, then hashes must match
if p.resp.Hash != hash.Hash {
cm.hasher.TriggerCorruptAlarm(p.id)
// It isn't clear which member's data is corrupted, so we
// intentionally set the memberID as 0. We will identify
// the corrupted members using quorum in 3.6. Please see
// discussion in https://github.com/etcd-io/etcd/pull/14828.
cm.hasher.TriggerCorruptAlarm(types.ID(0))
cm.lg.Error("failed compaction hash check",
zap.Int64("revision", hash.Revision),
zap.Int64("leader-compact-revision", hash.CompactRevision),
Expand Down
12 changes: 6 additions & 6 deletions server/etcdserver/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func TestPeriodicCheck(t *testing.T) {
{
name: "Different local hash and same revisions",
hasher: fakeHasher{hashByRevResponses: []hashByRev{{hash: mvcc.KeyValueHash{Hash: 1, CompactRevision: 1}, revision: 1}, {hash: mvcc.KeyValueHash{Hash: 2, CompactRevision: 1}, revision: 1}}},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "MemberId()", "TriggerCorruptAlarm(1)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "MemberId()", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand All @@ -163,15 +163,15 @@ func TestPeriodicCheck(t *testing.T) {
hasher: fakeHasher{
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(42)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
name: "Peer with newer compact revision",
hasher: fakeHasher{
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 88}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand All @@ -188,7 +188,7 @@ func TestPeriodicCheck(t *testing.T) {
hashByRevResponses: []hashByRev{{hash: mvcc.KeyValueHash{Hash: 1, CompactRevision: 1}, revision: 1}, {hash: mvcc.KeyValueHash{Hash: 2, CompactRevision: 2}, revision: 2}},
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 666}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}, CompactRevision: 1, Hash: 2}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(666)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand All @@ -199,7 +199,7 @@ func TestPeriodicCheck(t *testing.T) {
{peerInfo: peerInfo{id: 89}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}},
},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
}
Expand Down Expand Up @@ -267,7 +267,7 @@ func TestCompactHashCheck(t *testing.T) {
hashes: []mvcc.KeyValueHash{{Revision: 1, CompactRevision: 1, Hash: 1}, {Revision: 2, CompactRevision: 1, Hash: 2}},
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 3}}},
},
expectActions: []string{"MemberId()", "ReqTimeout()", "Hashes()", "PeerHashByRev(2)", "TriggerCorruptAlarm(42)"},
expectActions: []string{"MemberId()", "ReqTimeout()", "Hashes()", "PeerHashByRev(2)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand Down
12 changes: 3 additions & 9 deletions tests/e2e/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
time.Sleep(checkTime * 11 / 10)
alarmResponse, err := cc.AlarmList()
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}

func TestCompactHashCheckDetectCorruption(t *testing.T) {
Expand All @@ -164,14 +164,8 @@ func TestCompactHashCheckDetectCorruption(t *testing.T) {
err := cc.Put(testutil.PickKey(int64(i)), fmt.Sprint(i))
assert.NoError(t, err, "error on put")
}
members, err := cc.MemberList()
_, err = cc.MemberList()
assert.NoError(t, err, "error on member list")
var memberID uint64
for _, m := range members.Members {
if m.Name == epc.procs[0].Config().name {
memberID = m.ID
}
}

epc.procs[0].Stop()
err = testutil.CorruptBBolt(datadir.ToBackendFileName(epc.procs[0].Config().dataDirPath))
Expand All @@ -184,5 +178,5 @@ func TestCompactHashCheckDetectCorruption(t *testing.T) {
time.Sleep(checkTime * 11 / 10)
alarmResponse, err := cc.AlarmList()
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}
4 changes: 2 additions & 2 deletions tests/integration/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
time.Sleep(50 * time.Millisecond)
alarmResponse, err := cc.AlarmList(ctx)
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: uint64(clus.Members[0].ID())}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}

func TestCompactHashCheck(t *testing.T) {
Expand Down Expand Up @@ -169,5 +169,5 @@ func TestCompactHashCheckDetectCorruption(t *testing.T) {
time.Sleep(50 * time.Millisecond)
alarmResponse, err := cc.AlarmList(ctx)
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: uint64(clus.Members[0].ID())}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}

0 comments on commit 47ff26f

Please sign in to comment.