Skip to content

Commit

Permalink
cherry pick pingcap#35053 to release-6.0
Browse files Browse the repository at this point in the history
Signed-off-by: ti-srebot <ti-srebot@pingcap.com>
  • Loading branch information
Willendless authored and ti-srebot committed Jun 21, 2022
1 parent 36a9810 commit 15fbaa3
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 0 deletions.
25 changes: 25 additions & 0 deletions cmd/explaintest/r/collation_check_use_collation_disabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,29 @@ select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27
col_25
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
col_25
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
a
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
a
-1
-1
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
a b c
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);
a b c
use test
25 changes: 25 additions & 0 deletions cmd/explaintest/r/collation_check_use_collation_enabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,29 @@ col_25
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
col_25
89
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
a
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
a
-1
-1
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
a b c
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);
a b c
use test
23 changes: 23 additions & 0 deletions cmd/explaintest/t/collation_check_use_collation.test
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,28 @@ insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076
select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);

# check implicit binary collation cast
drop table if exists t1;
drop table if exists t2;
# issue 34823
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
# binary collation in single side
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);

# cleanup environment
use test
10 changes: 10 additions & 0 deletions expression/builtin_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/parser/terror"
Expand Down Expand Up @@ -1840,6 +1841,15 @@ func BuildCastCollationFunction(ctx sessionctx.Context, expr Expression, ec *Exp
} else {
return expr
}
} else if ec.Charset == charset.CharsetBin {
// When cast character string to binary string, if we still use fixed length representation,
// then 0 padding will be used, which can affect later execution.
// e.g. https://github.com/pingcap/tidb/issues/34823.
// On the other hand, we can not directly return origin expr back,
// since we need binary collation to do string comparison later.
// e.g. https://github.com/pingcap/tidb/pull/35053#discussion_r894155052
// Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem.
tp.SetType(mysql.TypeVarString)
}
tp.Charset, tp.Collate = ec.Charset, ec.Collation
newExpr := BuildCastFunction(ctx, expr, tp)
Expand Down
9 changes: 9 additions & 0 deletions planner/core/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,7 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen
return
}
for i := stkLen - elemCnt; i < stkLen; i++ {
// todo: consider refining the code and reusing expression.BuildCollationFunction here
if er.ctxStack[i].GetType().EvalType() == types.ETString {
rowFunc, ok := er.ctxStack[i].(*expression.ScalarFunction)
if ok && rowFunc.FuncName.String() == ast.RowFunc {
Expand All @@ -1560,6 +1561,14 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen
} else {
continue
}
} else if coll.Charset == charset.CharsetBin {
// When cast character string to binary string, if we still use fixed length representation,
// then 0 padding will be used, which can affect later execution.
// e.g. https://github.com/pingcap/tidb/pull/35053#pullrequestreview-1008757770 gives an unexpected case.
// On the other hand, we can not directly return origin expr back,
// since we need binary collation to do string comparison later.
// Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem.
tp.SetType(mysql.TypeVarString)
}
tp.Charset, tp.Collate = coll.Charset, coll.Collation
er.ctxStack[i] = expression.BuildCastFunction(er.sctx, er.ctxStack[i], tp)
Expand Down

0 comments on commit 15fbaa3

Please sign in to comment.