From 2ea253e3180f53e700e76137a30e4a726c9936d7 Mon Sep 17 00:00:00 2001 From: Lynn Date: Wed, 7 Dec 2022 15:22:04 +0800 Subject: [PATCH] ddl: handle ErrNotOwner properly for modify column (#39645) close pingcap/tidb#39643, close pingcap/tidb#39644 --- ddl/backfilling.go | 6 ++++++ ddl/column.go | 3 ++- ddl/index.go | 2 +- ddl/modify_column_test.go | 5 +++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 76e1fc2aaf5fd..f375c503e61fc 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -467,6 +467,12 @@ func (dc *ddlCtx) sendTasksAndWait(scheduler *backfillScheduler, totalAddedCount zap.String("task failed error", err.Error()), zap.String("take time", elapsedTime.String()), zap.NamedError("updateHandleError", err1)) + failpoint.Inject("MockGetIndexRecordErr", func() { + // Make sure this job didn't failed because by the "Write conflict" error. + if dbterror.ErrNotOwner.Equal(err) { + time.Sleep(50 * time.Millisecond) + } + }) return errors.Trace(err) } diff --git a/ddl/column.go b/ddl/column.go index 9f5174737915a..e9c353aacf2f5 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -834,7 +834,7 @@ func doReorgWorkForModifyColumn(w *worker, d *ddlCtx, t *meta.Meta, job *model.J // If timeout, we should return, check for the owner and re-wait job done. return false, ver, nil } - if kv.IsTxnRetryableError(err) { + if kv.IsTxnRetryableError(err) || dbterror.ErrNotOwner.Equal(err) { return false, ver, errors.Trace(err) } if err1 := rh.RemoveDDLReorgHandle(job, reorgInfo.elements); err1 != nil { @@ -1124,6 +1124,7 @@ func (w *worker) updateCurrentElement(t table.Table, reorgInfo *reorgInfo) error // Then the handle range of the rest elements' is [originalStartHandle, originalEndHandle]. if i == startElementOffsetToResetHandle+1 { reorgInfo.StartKey, reorgInfo.EndKey = originalStartHandle, originalEndHandle + w.getReorgCtx(reorgInfo.Job).setNextKey(reorgInfo.StartKey) } // Update the element in the reorgCtx to keep the atomic access for daemon-worker. diff --git a/ddl/index.go b/ddl/index.go index 9daa30fe93370..f6214a7d03e8a 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1256,7 +1256,7 @@ func (w *baseIndexWorker) getIndexRecord(idxInfo *model.IndexInfo, handle kv.Han failpoint.Return(nil, errors.Trace(dbterror.ErrCantDecodeRecord.GenWithStackByArgs("index", errors.New("mock can't decode record error")))) case "modifyColumnNotOwnerErr": - if idxInfo.Name.O == "_Idx$_idx" && handle.IntValue() == 7168 && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 0, 1) { + if idxInfo.Name.O == "_Idx$_idx_0" && handle.IntValue() == 7168 && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 0, 1) { failpoint.Return(nil, errors.Trace(dbterror.ErrNotOwner)) } case "addIdxNotOwnerErr": diff --git a/ddl/modify_column_test.go b/ddl/modify_column_test.go index b28a503ee4f79..bd9c574970f71 100644 --- a/ddl/modify_column_test.go +++ b/ddl/modify_column_test.go @@ -50,6 +50,11 @@ func batchInsert(tk *testkit.TestKit, tbl string, start, end int) { func TestModifyColumnReorgInfo(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) + originalTimeout := ddl.ReorgWaitTimeout + ddl.ReorgWaitTimeout = 10 * time.Millisecond + defer func() { + ddl.ReorgWaitTimeout = originalTimeout + }() tk := testkit.NewTestKit(t, store) tk.MustExec("use test") tk.MustExec("drop table if exists t1")