From 35a19baed3c052ca357de317471d1ff9458c546c Mon Sep 17 00:00:00 2001 From: Jiarui Li <34512395+Willendless@users.noreply.github.com> Date: Tue, 21 Jun 2022 00:22:36 -0400 Subject: [PATCH] cherry pick #35053 to release-5.4 Signed-off-by: ti-srebot --- .../r/collation_check_use_collation.result | 25 +++ ...lation_check_use_collation_disabled.result | 156 ++++++++++++++++++ .../t/collation_check_use_collation.test | 23 +++ expression/builtin_cast.go | 10 ++ planner/core/expression_rewriter.go | 9 + 5 files changed, 223 insertions(+) create mode 100644 cmd/explaintest/r/collation_check_use_collation_disabled.result diff --git a/cmd/explaintest/r/collation_check_use_collation.result b/cmd/explaintest/r/collation_check_use_collation.result index d878d140f647e..16ecab59bd032 100644 --- a/cmd/explaintest/r/collation_check_use_collation.result +++ b/cmd/explaintest/r/collation_check_use_collation.result @@ -147,4 +147,29 @@ col_25 select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); col_25 89 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +a +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +a +-1 +-1 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +a b c +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); +a b c use test diff --git a/cmd/explaintest/r/collation_check_use_collation_disabled.result b/cmd/explaintest/r/collation_check_use_collation_disabled.result new file mode 100644 index 0000000000000..06af2890faa8f --- /dev/null +++ b/cmd/explaintest/r/collation_check_use_collation_disabled.result @@ -0,0 +1,156 @@ +create database collation_check_use_collation; +use collation_check_use_collation; +CREATE TABLE `t` ( +`a` char(10) DEFAULT NULL +); +CREATE TABLE `t1` ( +`a` char(10) COLLATE utf8mb4_general_ci DEFAULT NULL +); +insert into t values ("A"); +insert into t1 values ("a"); +select a as a_col from t where t.a = all (select a collate utf8mb4_general_ci from t1); +a_col +select a as a_col from t where t.a != any (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a <= all (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a <= any (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a = (select a collate utf8mb4_general_ci from t1); +a_col +drop table if exists t; +create table t(a enum('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +a b +a b +drop table if exists t; +create table t(a enum('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +insert into t values ("B", "b"); +Error 1265: Data truncated for column 'a' at row 1 +select * from t where 'B' collate utf8mb4_general_ci in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a, b); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +a b +b c +select * from t where 1 in (a); +a b +select * from t where 2 in (a); +a b +b c +select * from t where 1 in (a, 0); +a b +select * from t where a between 1 and 2; +a b +b c +select * from t where a between 1 and "a"; +a b +select * from t where a between "a" and "b"; +a b +b c +select * from t where 2 between a and "c"; +a b +select * from t where 2 between a and 3; +a b +b c +select * from t where "b" between a and a; +a b +b c +select * from t where "b" collate utf8mb4_bin between a and a; +a b +b c +select * from t where "b" between a and 3; +a b +drop table if exists t; +create table t(a set('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +a b +a b +drop table if exists t; +create table t(a set('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +insert into t values ("B", "b"); +Error 1265: Data truncated for column 'a' at row 1 +select * from t where 'B' collate utf8mb4_general_ci in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a, b); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +a b +b c +select * from t where 1 in (a); +a b +select * from t where 2 in (a); +a b +b c +select * from t where 1 in (a, 0); +a b +select * from t where a between 1 and 2; +a b +b c +select * from t where a between 1 and "a"; +a b +select * from t where a between "a" and "b"; +a b +b c +select * from t where 2 between a and "c"; +a b +select * from t where 2 between a and 3; +a b +b c +select * from t where "b" between a and a; +a b +b c +select * from t where "b" collate utf8mb4_bin between a and a; +a b +b c +select * from t where "b" between a and 3; +a b +drop table if exists tbl_2; +create table tbl_2 ( col_20 bigint not null , col_21 smallint not null , col_22 decimal(24,10) default null , col_23 tinyint default 71 not null , col_24 bigint not null , col_25 tinyint default 18 , col_26 varchar(330) collate utf8_bin not null , col_27 char(77) collate utf8mb4_unicode_ci , col_28 char(46) collate utf8_general_ci not null , col_29 smallint unsigned not null , primary key idx_13 ( col_27(5) ) , key idx_14 ( col_24 ) , unique key idx_15 ( col_23,col_21,col_28,col_29,col_24 ) ) collate utf8_bin ; +insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076919247337,89,'WUicqUTgdGJcjbC','SapBPqczTWWSN','xUSwH',49462 ); +select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +col_25 +select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +col_25 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +a +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +a +-1 +-1 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +a b c +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); +a b c +use test diff --git a/cmd/explaintest/t/collation_check_use_collation.test b/cmd/explaintest/t/collation_check_use_collation.test index 2d73a6afa5716..2f26f7969801b 100644 --- a/cmd/explaintest/t/collation_check_use_collation.test +++ b/cmd/explaintest/t/collation_check_use_collation.test @@ -80,5 +80,28 @@ insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076 select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +# check implicit binary collation cast +drop table if exists t1; +drop table if exists t2; +# issue 34823 +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +# binary collation in single side +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); + # cleanup environment use test diff --git a/expression/builtin_cast.go b/expression/builtin_cast.go index 5c3ee89df4c92..6fa6f36be5925 100644 --- a/expression/builtin_cast.go +++ b/expression/builtin_cast.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/parser/ast" + "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/parser/terror" @@ -1842,6 +1843,15 @@ func BuildCastCollationFunction(ctx sessionctx.Context, expr Expression, ec *Exp } else { return expr } + } else if ec.Charset == charset.CharsetBin { + // When cast character string to binary string, if we still use fixed length representation, + // then 0 padding will be used, which can affect later execution. + // e.g. https://github.com/pingcap/tidb/issues/34823. + // On the other hand, we can not directly return origin expr back, + // since we need binary collation to do string comparison later. + // e.g. https://github.com/pingcap/tidb/pull/35053#discussion_r894155052 + // Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem. + tp.SetType(mysql.TypeVarString) } tp.Charset, tp.Collate = ec.Charset, ec.Collation newExpr := BuildCastFunction(ctx, expr, tp) diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 91ac0dfdc1b2f..b36192e113a1b 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -1544,6 +1544,7 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen return } for i := stkLen - elemCnt; i < stkLen; i++ { + // todo: consider refining the code and reusing expression.BuildCollationFunction here if er.ctxStack[i].GetType().EvalType() == types.ETString { rowFunc, ok := er.ctxStack[i].(*expression.ScalarFunction) if ok && rowFunc.FuncName.String() == ast.RowFunc { @@ -1560,6 +1561,14 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen } else { continue } + } else if coll.Charset == charset.CharsetBin { + // When cast character string to binary string, if we still use fixed length representation, + // then 0 padding will be used, which can affect later execution. + // e.g. https://github.com/pingcap/tidb/pull/35053#pullrequestreview-1008757770 gives an unexpected case. + // On the other hand, we can not directly return origin expr back, + // since we need binary collation to do string comparison later. + // Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem. + tp.SetType(mysql.TypeVarString) } tp.Charset, tp.Collate = coll.Charset, coll.Collation er.ctxStack[i] = expression.BuildCastFunction(er.sctx, er.ctxStack[i], tp)