fix the data inconsistency issue by adding a txPostLockHook into the backend

Previously the SetConsistentIndex() is called during the apply workflow,
but it's outside the db transaction. If a commit happens between SetConsistentIndex
and the following apply workflow, and etcd crashes for whatever reason right
after the commit, then etcd commits an incomplete transaction to db.
Eventually etcd runs into the data inconsistency issue.

In this commit, we move the SetConsistentIndex into a txPostLockHook, so
it will be executed inside the transaction lock.
This commit is contained in:
ahrtr
2022-04-08 14:39:23 +08:00
parent 3ace622792
commit 66c7aab4d3
21 changed files with 216 additions and 83 deletions

View File

@@ -223,7 +223,7 @@ func (as *authStore) AuthEnable() error {
}
b := as.be
tx := b.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer func() {
tx.Unlock()
b.ForceCommit()
@@ -259,7 +259,7 @@ func (as *authStore) AuthDisable() {
}
b := as.be
tx := b.BatchTx()
tx.Lock()
tx.LockInsideApply()
tx.UnsafePut(buckets.Auth, enableFlagKey, authDisabled)
as.commitRevision(tx)
tx.Unlock()
@@ -287,7 +287,7 @@ func (as *authStore) Authenticate(ctx context.Context, username, password string
}
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
user := getUser(as.lg, tx, username)
@@ -324,7 +324,7 @@ func (as *authStore) CheckPassword(username, password string) (uint64, error) {
// CompareHashAndPassword is very expensive, so we use closures
// to avoid putting it in the critical section of the tx lock.
revision, err := func() (uint64, error) {
tx := as.be.BatchTx()
tx := as.be.ReadTx()
tx.Lock()
defer tx.Unlock()
@@ -353,7 +353,7 @@ func (as *authStore) CheckPassword(username, password string) (uint64, error) {
func (as *authStore) Recover(be backend.Backend) {
enabled := false
as.be = be
tx := be.BatchTx()
tx := be.ReadTx()
tx.Lock()
_, vs := tx.UnsafeRange(buckets.Auth, enableFlagKey, nil, 0)
if len(vs) == 1 {
@@ -385,7 +385,7 @@ func (as *authStore) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse,
}
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
user := getUser(as.lg, tx, r.Name)
@@ -431,7 +431,7 @@ func (as *authStore) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDelete
}
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
user := getUser(as.lg, tx, r.Name)
@@ -456,7 +456,7 @@ func (as *authStore) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDelete
func (as *authStore) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
user := getUser(as.lg, tx, r.Name)
@@ -498,7 +498,7 @@ func (as *authStore) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*p
func (as *authStore) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
user := getUser(as.lg, tx, r.User)
@@ -544,7 +544,7 @@ func (as *authStore) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUser
func (as *authStore) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
user := getUser(as.lg, tx, r.Name)
tx.Unlock()
@@ -559,7 +559,7 @@ func (as *authStore) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse,
func (as *authStore) UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
users := getAllUsers(as.lg, tx)
tx.Unlock()
@@ -581,7 +581,7 @@ func (as *authStore) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUs
}
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
user := getUser(as.lg, tx, r.Name)
@@ -623,7 +623,7 @@ func (as *authStore) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUs
func (as *authStore) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
var resp pb.AuthRoleGetResponse
@@ -638,7 +638,7 @@ func (as *authStore) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse,
func (as *authStore) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
roles := getAllRoles(as.lg, tx)
tx.Unlock()
@@ -651,7 +651,7 @@ func (as *authStore) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListRespon
func (as *authStore) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
role := getRole(as.lg, tx, r.Role)
@@ -697,7 +697,7 @@ func (as *authStore) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDelete
}
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
role := getRole(as.lg, tx, r.Role)
@@ -742,7 +742,7 @@ func (as *authStore) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse,
}
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
role := getRole(as.lg, tx, r.Name)
@@ -786,7 +786,7 @@ func (as *authStore) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (
}
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
defer tx.Unlock()
role := getRole(as.lg, tx, r.Name)
@@ -849,7 +849,7 @@ func (as *authStore) isOpPermitted(userName string, revision uint64, key, rangeE
return ErrAuthOldRevision
}
tx := as.be.BatchTx()
tx := as.be.ReadTx()
tx.Lock()
defer tx.Unlock()
@@ -891,7 +891,7 @@ func (as *authStore) IsAdminPermitted(authInfo *AuthInfo) error {
return ErrUserEmpty
}
tx := as.be.BatchTx()
tx := as.be.ReadTx()
tx.Lock()
u := getUser(as.lg, tx, authInfo.Username)
tx.Unlock()
@@ -907,7 +907,7 @@ func (as *authStore) IsAdminPermitted(authInfo *AuthInfo) error {
return nil
}
func getUser(lg *zap.Logger, tx backend.BatchTx, username string) *authpb.User {
func getUser(lg *zap.Logger, tx backend.ReadTx, username string) *authpb.User {
_, vs := tx.UnsafeRange(buckets.AuthUsers, []byte(username), nil, 0)
if len(vs) == 0 {
return nil
@@ -925,7 +925,7 @@ func getUser(lg *zap.Logger, tx backend.BatchTx, username string) *authpb.User {
return user
}
func getAllUsers(lg *zap.Logger, tx backend.BatchTx) []*authpb.User {
func getAllUsers(lg *zap.Logger, tx backend.ReadTx) []*authpb.User {
_, vs := tx.UnsafeRange(buckets.AuthUsers, []byte{0}, []byte{0xff}, -1)
if len(vs) == 0 {
return nil
@@ -955,7 +955,7 @@ func delUser(tx backend.BatchTx, username string) {
tx.UnsafeDelete(buckets.AuthUsers, []byte(username))
}
func getRole(lg *zap.Logger, tx backend.BatchTx, rolename string) *authpb.Role {
func getRole(lg *zap.Logger, tx backend.ReadTx, rolename string) *authpb.Role {
_, vs := tx.UnsafeRange(buckets.AuthRoles, []byte(rolename), nil, 0)
if len(vs) == 0 {
return nil
@@ -969,7 +969,7 @@ func getRole(lg *zap.Logger, tx backend.BatchTx, rolename string) *authpb.Role {
return role
}
func getAllRoles(lg *zap.Logger, tx backend.BatchTx) []*authpb.Role {
func getAllRoles(lg *zap.Logger, tx backend.ReadTx) []*authpb.Role {
_, vs := tx.UnsafeRange(buckets.AuthRoles, []byte{0}, []byte{0xff}, -1)
if len(vs) == 0 {
return nil
@@ -1028,7 +1028,7 @@ func NewAuthStore(lg *zap.Logger, be backend.Backend, tp TokenProvider, bcryptCo
}
tx := be.BatchTx()
tx.Lock()
tx.LockOutsideApply()
tx.UnsafeCreateBucket(buckets.Auth)
tx.UnsafeCreateBucket(buckets.AuthUsers)
@@ -1081,7 +1081,7 @@ func (as *authStore) commitRevision(tx backend.BatchTx) {
tx.UnsafePut(buckets.Auth, revisionKey, revBytes)
}
func getRevision(tx backend.BatchTx) uint64 {
func getRevision(tx backend.ReadTx) uint64 {
_, vs := tx.UnsafeRange(buckets.Auth, revisionKey, nil, 0)
if len(vs) != 1 {
// this can happen in the initialization phase
@@ -1281,7 +1281,7 @@ func (as *authStore) WithRoot(ctx context.Context) context.Context {
func (as *authStore) HasRole(user, role string) bool {
tx := as.be.BatchTx()
tx.Lock()
tx.LockInsideApply()
u := getUser(as.lg, tx, user)
tx.Unlock()