Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#48984
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
time-and-fate authored and ti-chi-bot committed Nov 30, 2023
1 parent 069631e commit f82197f
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2195,7 +2195,7 @@
},
{
"SQL": "select a from t where c_str like 'abc_'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc%af'",
Expand Down Expand Up @@ -2223,7 +2223,7 @@
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
Expand Down
13 changes: 13 additions & 0 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,19 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
if err != nil {
return false, true
}
<<<<<<< HEAD
=======
likeFuncReserve := !c.isFullLengthColumn()

// Different from `=`, trailing spaces are always significant, and can't be ignored in `like`.
// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
// unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by
// the `like` function. Therefore, a Selection is needed to filter the data.
if isPadSpaceCollation(collation) {
likeFuncReserve = true
}

>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
if len(patternStr) == 0 {
return true, !c.isFullLengthColumn()
}
Expand Down
25 changes: 22 additions & 3 deletions pkg/util/ranger/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/pingcap/tidb/pkg/errno"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -678,9 +679,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
break
} else if pattern[i] == '_' {
// Get the prefix, but exclude the prefix.
// e.g., "abc_x", the start point exclude "abc",
// because the string length is more than 3.
exclude = true
// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
//
// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
// 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
// we will actually miss 'xxx ', which will cause wrong results.
if !isPadSpaceCollation(collation) {
exclude = true
}
isExactMatch = false
break
}
Expand Down Expand Up @@ -715,7 +722,19 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
return []*point{startPoint, endPoint}
}

<<<<<<< HEAD
func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point {
=======
// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
// implemented in the future.
func isPadSpaceCollation(collation string) bool {
return collation != charset.CollationBin
}

func (r *builder) buildFromNot(expr *expression.ScalarFunction, prefixLen int) []*point {
>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
switch n := expr.FuncName.L; n {
case ast.IsTruthWithoutNull:
return r.buildFromIsTrue(expr, 1, false)
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ create table t(
exprStr: "a LIKE 'abc_'",
accessConds: "[like(test.t.a, abc_, 92)]",
filterConds: "[like(test.t.a, abc_, 92)]",
resultStr: "[(\"abc\",\"abd\")]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
indexPos: 0,
Expand Down
136 changes: 136 additions & 0 deletions tests/integrationtest/r/planner/core/issuetest/planner_issue.result
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,139 @@ LEFT JOIN tmp3 c3 ON c3.id = '1';
id id
1 1
1 1
<<<<<<< HEAD
=======
drop table if exists t;
create table t(a int, b int);
set @@tidb_max_chunk_size = 32;
insert into t values(1, 1);
insert into t select a+1, a+1 from t;
insert into t select a+2, a+2 from t;
insert into t select a+4, a+4 from t;
insert into t select a+8, a+8 from t;
insert into t select a+16, a+16 from t;
insert into t select a+32, a+32 from t;
select a from (select 100 as a, 100 as b union all select * from t) t where b != 0;
a
100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
set @@tidb_max_chunk_size = default;
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
└─IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo
explain format = brief select *,length(a) from t1 where a like '测试';
id estRows task access object operator info
Projection 10.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─UnionScan 10.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
└─IndexReader 10.00 root index:Selection
└─Selection 10.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
└─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo
select *,length(a) from t1 where a like '测试 %';
a length(a)
测试 8
select *,length(a) from t1 where a like '测试';
a length(a)
测试 6
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
└─IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo
select * from t1 use index (ia) where a like 'xxx_';
a
xxx
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
id estRows task access object operator info
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
explain format = brief select *,length(a) from t2 where a like '测试';
id estRows task access object operator info
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
select *,length(a) from t2 where a like '测试 %';
a length(a)
测试 6
select *,length(a) from t2 where a like '测试';
a length(a)
测试 4
>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
36 changes: 36 additions & 0 deletions tests/integrationtest/t/planner/core/issuetest/planner_issue.test
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,39 @@ FROM
t2 db
LEFT JOIN tmp3 c2 ON c2.id = '1'
LEFT JOIN tmp3 c3 ON c3.id = '1';
<<<<<<< HEAD
=======

# https://github.com/pingcap/tidb/issues/48755
drop table if exists t;
create table t(a int, b int);
set @@tidb_max_chunk_size = 32;
# insert into more than 32 rows to the table.
insert into t values(1, 1);
insert into t select a+1, a+1 from t;
insert into t select a+2, a+2 from t;
insert into t select a+4, a+4 from t;
insert into t select a+8, a+8 from t;
insert into t select a+16, a+16 from t;
insert into t select a+32, a+32 from t;
select a from (select 100 as a, 100 as b union all select * from t) t where b != 0;
set @@tidb_max_chunk_size = default;

# https://github.com/pingcap/tidb/issues/48821
# https://github.com/pingcap/tidb/issues/48983
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
explain format = brief select *,length(a) from t1 where a like '测试';
select *,length(a) from t1 where a like '测试 %';
select *,length(a) from t1 where a like '测试';
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
select * from t1 use index (ia) where a like 'xxx_';
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
explain format = brief select *,length(a) from t2 where a like '测试';
select *,length(a) from t2 where a like '测试 %';
select *,length(a) from t2 where a like '测试';
>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))

0 comments on commit f82197f

Please sign in to comment.