Skip to content

Commit

Permalink
cherry pick pingcap#35053 to release-5.4
Browse files Browse the repository at this point in the history
Signed-off-by: ti-srebot <ti-srebot@pingcap.com>
  • Loading branch information
Willendless authored and ti-srebot committed Jun 21, 2022
1 parent 4d888ce commit 35a19ba
Show file tree
Hide file tree
Showing 5 changed files with 223 additions and 0 deletions.
25 changes: 25 additions & 0 deletions cmd/explaintest/r/collation_check_use_collation.result
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,29 @@ col_25
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
col_25
89
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
a
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
a
-1
-1
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
a b c
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);
a b c
use test
156 changes: 156 additions & 0 deletions cmd/explaintest/r/collation_check_use_collation_disabled.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
create database collation_check_use_collation;
use collation_check_use_collation;
CREATE TABLE `t` (
`a` char(10) DEFAULT NULL
);
CREATE TABLE `t1` (
`a` char(10) COLLATE utf8mb4_general_ci DEFAULT NULL
);
insert into t values ("A");
insert into t1 values ("a");
select a as a_col from t where t.a = all (select a collate utf8mb4_general_ci from t1);
a_col
select a as a_col from t where t.a != any (select a collate utf8mb4_general_ci from t1);
a_col
A
select a as a_col from t where t.a <= all (select a collate utf8mb4_general_ci from t1);
a_col
A
select a as a_col from t where t.a <= any (select a collate utf8mb4_general_ci from t1);
a_col
A
select a as a_col from t where t.a = (select a collate utf8mb4_general_ci from t1);
a_col
drop table if exists t;
create table t(a enum('a', 'b'), b varchar(20));
insert into t values ("a", "b");
select * from t where a in (a);
a b
a b
drop table if exists t;
create table t(a enum('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20));
insert into t values ("b", "c");
insert into t values ("B", "b");
Error 1265: Data truncated for column 'a' at row 1
select * from t where 'B' collate utf8mb4_general_ci in (a);
a b
select * from t where 'B' collate utf8mb4_bin in (a);
a b
select * from t where 'B' collate utf8mb4_bin in (a, b);
a b
select * from t where 'B' collate utf8mb4_bin in (a, "a", 1);
a b
select * from t where 'B' collate utf8mb4_bin in (a, "B", 1);
a b
b c
select * from t where 1 in (a);
a b
select * from t where 2 in (a);
a b
b c
select * from t where 1 in (a, 0);
a b
select * from t where a between 1 and 2;
a b
b c
select * from t where a between 1 and "a";
a b
select * from t where a between "a" and "b";
a b
b c
select * from t where 2 between a and "c";
a b
select * from t where 2 between a and 3;
a b
b c
select * from t where "b" between a and a;
a b
b c
select * from t where "b" collate utf8mb4_bin between a and a;
a b
b c
select * from t where "b" between a and 3;
a b
drop table if exists t;
create table t(a set('a', 'b'), b varchar(20));
insert into t values ("a", "b");
select * from t where a in (a);
a b
a b
drop table if exists t;
create table t(a set('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20));
insert into t values ("b", "c");
insert into t values ("B", "b");
Error 1265: Data truncated for column 'a' at row 1
select * from t where 'B' collate utf8mb4_general_ci in (a);
a b
select * from t where 'B' collate utf8mb4_bin in (a);
a b
select * from t where 'B' collate utf8mb4_bin in (a, b);
a b
select * from t where 'B' collate utf8mb4_bin in (a, "a", 1);
a b
select * from t where 'B' collate utf8mb4_bin in (a, "B", 1);
a b
b c
select * from t where 1 in (a);
a b
select * from t where 2 in (a);
a b
b c
select * from t where 1 in (a, 0);
a b
select * from t where a between 1 and 2;
a b
b c
select * from t where a between 1 and "a";
a b
select * from t where a between "a" and "b";
a b
b c
select * from t where 2 between a and "c";
a b
select * from t where 2 between a and 3;
a b
b c
select * from t where "b" between a and a;
a b
b c
select * from t where "b" collate utf8mb4_bin between a and a;
a b
b c
select * from t where "b" between a and 3;
a b
drop table if exists tbl_2;
create table tbl_2 ( col_20 bigint not null , col_21 smallint not null , col_22 decimal(24,10) default null , col_23 tinyint default 71 not null , col_24 bigint not null , col_25 tinyint default 18 , col_26 varchar(330) collate utf8_bin not null , col_27 char(77) collate utf8mb4_unicode_ci , col_28 char(46) collate utf8_general_ci not null , col_29 smallint unsigned not null , primary key idx_13 ( col_27(5) ) , key idx_14 ( col_24 ) , unique key idx_15 ( col_23,col_21,col_28,col_29,col_24 ) ) collate utf8_bin ;
insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076919247337,89,'WUicqUTgdGJcjbC','SapBPqczTWWSN','xUSwH',49462 );
select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
col_25
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
col_25
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
a
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
a
-1
-1
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
a b c
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);
a b c
use test
23 changes: 23 additions & 0 deletions cmd/explaintest/t/collation_check_use_collation.test
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,28 @@ insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076
select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);

# check implicit binary collation cast
drop table if exists t1;
drop table if exists t2;
# issue 34823
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
# binary collation in single side
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);

# cleanup environment
use test
10 changes: 10 additions & 0 deletions expression/builtin_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/parser/terror"
Expand Down Expand Up @@ -1842,6 +1843,15 @@ func BuildCastCollationFunction(ctx sessionctx.Context, expr Expression, ec *Exp
} else {
return expr
}
} else if ec.Charset == charset.CharsetBin {
// When cast character string to binary string, if we still use fixed length representation,
// then 0 padding will be used, which can affect later execution.
// e.g. https://github.com/pingcap/tidb/issues/34823.
// On the other hand, we can not directly return origin expr back,
// since we need binary collation to do string comparison later.
// e.g. https://github.com/pingcap/tidb/pull/35053#discussion_r894155052
// Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem.
tp.SetType(mysql.TypeVarString)
}
tp.Charset, tp.Collate = ec.Charset, ec.Collation
newExpr := BuildCastFunction(ctx, expr, tp)
Expand Down
9 changes: 9 additions & 0 deletions planner/core/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,7 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen
return
}
for i := stkLen - elemCnt; i < stkLen; i++ {
// todo: consider refining the code and reusing expression.BuildCollationFunction here
if er.ctxStack[i].GetType().EvalType() == types.ETString {
rowFunc, ok := er.ctxStack[i].(*expression.ScalarFunction)
if ok && rowFunc.FuncName.String() == ast.RowFunc {
Expand All @@ -1560,6 +1561,14 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen
} else {
continue
}
} else if coll.Charset == charset.CharsetBin {
// When cast character string to binary string, if we still use fixed length representation,
// then 0 padding will be used, which can affect later execution.
// e.g. https://github.com/pingcap/tidb/pull/35053#pullrequestreview-1008757770 gives an unexpected case.
// On the other hand, we can not directly return origin expr back,
// since we need binary collation to do string comparison later.
// Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem.
tp.SetType(mysql.TypeVarString)
}
tp.Charset, tp.Collate = coll.Charset, coll.Collation
er.ctxStack[i] = expression.BuildCastFunction(er.sctx, er.ctxStack[i], tp)
Expand Down

0 comments on commit 35a19ba

Please sign in to comment.