From b2d43dd56ffee693449943a3b070e20800da7960 Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Wed, 21 Jun 2023 17:43:24 +0530 Subject: [PATCH 1/2] feat: move storing the lock action outside Signed-off-by: Manan Gupta --- go/vt/vtctl/reparentutil/emergency_reparenter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go index 536d77bdaad..ae0a4766ada 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go @@ -97,10 +97,10 @@ func NewEmergencyReparenter(ts *topo.Server, tmc tmclient.TabletManagerClient, l // keyspace and shard. func (erp *EmergencyReparenter) ReparentShard(ctx context.Context, keyspace string, shard string, opts EmergencyReparentOptions) (*events.Reparent, error) { var err error + opts.lockAction = erp.getLockAction(opts.NewPrimaryAlias) // First step is to lock the shard for the given operation, if not already locked if err = topo.CheckShardLocked(ctx, keyspace, shard); err != nil { var unlock func(*error) - opts.lockAction = erp.getLockAction(opts.NewPrimaryAlias) ctx, unlock, err = erp.ts.LockShard(ctx, keyspace, shard, opts.lockAction) if err != nil { return nil, err From e5a371791e6586d856c30a57eff0580e07f2d9da Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Wed, 21 Jun 2023 18:33:28 +0530 Subject: [PATCH 2/2] feat: improve shard lock action Signed-off-by: Manan Gupta --- go/vt/vtorc/logic/tablet_discovery.go | 13 ++++++++---- go/vt/vtorc/logic/tablet_discovery_test.go | 24 ++++++++++++++++++++++ go/vt/vtorc/logic/topology_recovery.go | 2 +- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/go/vt/vtorc/logic/tablet_discovery.go b/go/vt/vtorc/logic/tablet_discovery.go index 30827036044..b8853ae07e8 100644 --- a/go/vt/vtorc/logic/tablet_discovery.go +++ b/go/vt/vtorc/logic/tablet_discovery.go @@ -19,6 +19,7 @@ package logic import ( "context" "errors" + "fmt" "strings" "sync" "sync/atomic" @@ -252,14 +253,18 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an } } +func getLockAction(analysedInstance string, code inst.AnalysisCode) string { + return fmt.Sprintf("VTOrc Recovery for %v on %v", code, analysedInstance) +} + // LockShard locks the keyspace-shard preventing others from performing conflicting actions. -func LockShard(ctx context.Context, tabletAlias string) (context.Context, func(*error), error) { +func LockShard(ctx context.Context, tabletAlias string, lockAction string) (context.Context, func(*error), error) { if tabletAlias == "" { - return nil, nil, errors.New("Can't lock shard: instance is unspecified") + return nil, nil, errors.New("can't lock shard: instance is unspecified") } val := atomic.LoadInt32(&hasReceivedSIGTERM) if val > 0 { - return nil, nil, errors.New("Can't lock shard: SIGTERM received") + return nil, nil, errors.New("can't lock shard: SIGTERM received") } tablet, err := inst.ReadTablet(tabletAlias) @@ -268,7 +273,7 @@ func LockShard(ctx context.Context, tabletAlias string) (context.Context, func(* } atomic.AddInt32(&shardsLockCounter, 1) - ctx, unlock, err := ts.TryLockShard(ctx, tablet.Keyspace, tablet.Shard, "Orc Recovery") + ctx, unlock, err := ts.TryLockShard(ctx, tablet.Keyspace, tablet.Shard, lockAction) if err != nil { atomic.AddInt32(&shardsLockCounter, -1) return nil, nil, err diff --git a/go/vt/vtorc/logic/tablet_discovery_test.go b/go/vt/vtorc/logic/tablet_discovery_test.go index 1166dd2e40d..65e263b19a9 100644 --- a/go/vt/vtorc/logic/tablet_discovery_test.go +++ b/go/vt/vtorc/logic/tablet_discovery_test.go @@ -18,6 +18,7 @@ package logic import ( "context" + "fmt" "sync/atomic" "testing" @@ -307,3 +308,26 @@ func verifyTabletCount(t *testing.T, countWanted int) { require.NoError(t, err) require.Equal(t, countWanted, totalTablets) } + +func TestGetLockAction(t *testing.T) { + tests := []struct { + analysedInstance string + code inst.AnalysisCode + want string + }{ + { + analysedInstance: "zone1-100", + code: inst.DeadPrimary, + want: "VTOrc Recovery for DeadPrimary on zone1-100", + }, { + analysedInstance: "zone1-200", + code: inst.ReplicationStopped, + want: "VTOrc Recovery for ReplicationStopped on zone1-200", + }, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("%v-%v", tt.analysedInstance, tt.code), func(t *testing.T) { + require.Equal(t, tt.want, getLockAction(tt.analysedInstance, tt.code)) + }) + } +} diff --git a/go/vt/vtorc/logic/topology_recovery.go b/go/vt/vtorc/logic/topology_recovery.go index 611636c6e20..1a4eecf71ad 100644 --- a/go/vt/vtorc/logic/topology_recovery.go +++ b/go/vt/vtorc/logic/topology_recovery.go @@ -593,7 +593,7 @@ func executeCheckAndRecoverFunction(analysisEntry inst.ReplicationAnalysis) (err } // We lock the shard here and then refresh the tablets information - ctx, unlock, err := LockShard(context.Background(), analysisEntry.AnalyzedInstanceAlias) + ctx, unlock, err := LockShard(context.Background(), analysisEntry.AnalyzedInstanceAlias, getLockAction(analysisEntry.AnalyzedInstanceAlias, analysisEntry.Analysis)) if err != nil { return err }