From 8bd884e4a01012577b8859ed8c3a19b0f5b8883c Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 30 Dec 2021 22:39:04 +0800 Subject: [PATCH 1/3] ddl, types: convert the binary default value to proper encoding --- ddl/ddl_api.go | 23 ++++++++++------ ddl/integration_test.go | 60 +++++++++++++++++++++++++++++++++++++++++ types/datum.go | 3 +++ 3 files changed, 78 insertions(+), 8 deletions(-) create mode 100644 ddl/integration_test.go diff --git a/ddl/ddl_api.go b/ddl/ddl_api.go index 128ad9b2e43f2..1406cbc3bc9d1 100644 --- a/ddl/ddl_api.go +++ b/ddl/ddl_api.go @@ -902,13 +902,20 @@ func getDefaultValue(ctx sessionctx.Context, col *table.Column, c *ast.ColumnOpt } if v.Kind() == types.KindBinaryLiteral || v.Kind() == types.KindMysqlBit { - if tp == mysql.TypeBit || - tp == mysql.TypeString || tp == mysql.TypeVarchar || tp == mysql.TypeVarString || - tp == mysql.TypeBlob || tp == mysql.TypeLongBlob || tp == mysql.TypeMediumBlob || tp == mysql.TypeTinyBlob || - tp == mysql.TypeJSON || tp == mysql.TypeEnum || tp == mysql.TypeSet { - // For BinaryLiteral / string fields, when getting default value we cast the value into BinaryLiteral{}, thus we return - // its raw string content here. - return v.GetBinaryLiteral().ToString(), false, nil + if types.IsTypeBlob(tp) || tp == mysql.TypeJSON { + // BLOB/TEXT/JSON column cannot have a default value. + // Skip the unnecessary decode procedure. + return v.GetString(), false, err + } + if tp == mysql.TypeBit || tp == mysql.TypeString || tp == mysql.TypeVarchar || + tp == mysql.TypeVarString || tp == mysql.TypeEnum || tp == mysql.TypeSet { + // For BinaryLiteral or bit fields, we decode the default value to utf8 string. + str, err := v.GetBinaryStringDecoded(nil, col.Charset) + if err != nil { + // Overwrite the decoding error with invalid default value error. + err = ErrInvalidDefaultValue.GenWithStackByArgs(col.Name.O) + } + return str, false, err } // For other kind of fields (e.g. INT), we supply its integer as string value. value, err := v.GetBinaryLiteral().ToInt(ctx.GetSessionVars().StmtCtx) @@ -3937,7 +3944,7 @@ func setDefaultValue(ctx sessionctx.Context, col *table.Column, option *ast.Colu hasDefaultValue := false value, isSeqExpr, err := getDefaultValue(ctx, col, option) if err != nil { - return hasDefaultValue, errors.Trace(err) + return false, errors.Trace(err) } if isSeqExpr { if err := checkSequenceDefaultValue(col); err != nil { diff --git a/ddl/integration_test.go b/ddl/integration_test.go new file mode 100644 index 0000000000000..1fdc6df425a09 --- /dev/null +++ b/ddl/integration_test.go @@ -0,0 +1,60 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl_test + +import ( + "fmt" + "testing" + + "github.com/pingcap/tidb/testkit" + "github.com/pingcap/tidb/util/collate" +) + +func TestDefaultValueIsBinaryString(t *testing.T) { + collate.SetCharsetFeatEnabledForTest(true) + defer collate.SetCharsetFeatEnabledForTest(false) + store, clean := testkit.CreateMockStore(t) + defer clean() + tests := []struct { + colTp string + defVal string + result string + }{ + {"char(10) charset gbk", "0xC4E3BAC3", "你好"}, + {"varchar(10) charset gbk", "0xC4E3BAC3", "你好"}, + {"char(10) charset utf8mb4", "0xE4BDA0E5A5BD", "你好"}, + {"char(10) charset utf8mb4", "0b111001001011100010010110111001111001010110001100", "世界"}, + {"bit(48)", "0xE4BDA0E5A5BD", "你好"}, + {"enum('你好')", "0xE4BDA0E5A5BD", "你好"}, + {"set('你好')", "0xE4BDA0E5A5BD", "你好"}, + } + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test;") + for _, tt := range tests { + tk.MustExec("drop table if exists t;") + template := "create table t (a %s default %s);" + tk.MustExec(fmt.Sprintf(template, tt.colTp, tt.defVal)) + tk.MustExec("insert into t values (default);") + tk.MustQuery("select a from t;").Check(testkit.Rows(tt.result)) + } + + // Test invalid default value. + tk.MustExec("drop table if exists t;") + // 0xE4BDA0E5A5BD81 is an invalid utf-8 string. + tk.MustGetErrMsg("create table t (a char(20) charset utf8mb4 default 0xE4BDA0E5A5BD81);", + "[ddl:1067]Invalid default value for 'a'") + tk.MustGetErrMsg("create table t (a blob default 0xE4BDA0E5A5BD81);", + "[ddl:1101]BLOB/TEXT/JSON column 'a' can't have a default value") +} diff --git a/types/datum.go b/types/datum.go index 0ca7222c6fc22..68b76c7bfdca0 100644 --- a/types/datum.go +++ b/types/datum.go @@ -222,6 +222,9 @@ func (d *Datum) GetStringWithCheck(sc *stmtctx.StatementContext, chs string) (st func findEncoding(sc *stmtctx.StatementContext, chs string) (enc charset.Encoding, skip bool) { enc = charset.FindEncoding(chs) + if sc == nil { + return enc, false + } if enc.Tp() == charset.EncodingTpUTF8 && sc.SkipUTF8Check || enc.Tp() == charset.EncodingTpASCII && sc.SkipASCIICheck { return nil, true From 39cd5d1173006a53e103aeec1f04d40a069b1160 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 30 Dec 2021 22:42:21 +0800 Subject: [PATCH 2/3] remove unnecessary change --- types/datum.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/types/datum.go b/types/datum.go index 68b76c7bfdca0..0ca7222c6fc22 100644 --- a/types/datum.go +++ b/types/datum.go @@ -222,9 +222,6 @@ func (d *Datum) GetStringWithCheck(sc *stmtctx.StatementContext, chs string) (st func findEncoding(sc *stmtctx.StatementContext, chs string) (enc charset.Encoding, skip bool) { enc = charset.FindEncoding(chs) - if sc == nil { - return enc, false - } if enc.Tp() == charset.EncodingTpUTF8 && sc.SkipUTF8Check || enc.Tp() == charset.EncodingTpASCII && sc.SkipASCIICheck { return nil, true From 41b5313a08ccffaba765fa57c5732de349ed0bc3 Mon Sep 17 00:00:00 2001 From: tangenta Date: Fri, 31 Dec 2021 10:58:01 +0800 Subject: [PATCH 3/3] address comment and fix nil pointer --- ddl/integration_test.go | 1 + types/datum.go | 3 +++ 2 files changed, 4 insertions(+) diff --git a/ddl/integration_test.go b/ddl/integration_test.go index 1fdc6df425a09..fbd4a321c77a0 100644 --- a/ddl/integration_test.go +++ b/ddl/integration_test.go @@ -33,6 +33,7 @@ func TestDefaultValueIsBinaryString(t *testing.T) { result string }{ {"char(10) charset gbk", "0xC4E3BAC3", "你好"}, + {"char(10) charset gbk", "'好'", "好"}, {"varchar(10) charset gbk", "0xC4E3BAC3", "你好"}, {"char(10) charset utf8mb4", "0xE4BDA0E5A5BD", "你好"}, {"char(10) charset utf8mb4", "0b111001001011100010010110111001111001010110001100", "世界"}, diff --git a/types/datum.go b/types/datum.go index 0ca7222c6fc22..68b76c7bfdca0 100644 --- a/types/datum.go +++ b/types/datum.go @@ -222,6 +222,9 @@ func (d *Datum) GetStringWithCheck(sc *stmtctx.StatementContext, chs string) (st func findEncoding(sc *stmtctx.StatementContext, chs string) (enc charset.Encoding, skip bool) { enc = charset.FindEncoding(chs) + if sc == nil { + return enc, false + } if enc.Tp() == charset.EncodingTpUTF8 && sc.SkipUTF8Check || enc.Tp() == charset.EncodingTpASCII && sc.SkipASCIICheck { return nil, true