This is an automated cherry-pick of pingcap#48984

Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
ti-chi-bot · Nov 30, 2023 · f82197f · f82197f
1 parent 069631e
commit f82197f
Show file tree

Hide file tree

Showing 6 changed files with 210 additions and 6 deletions.
diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json
@@ -2195,7 +2195,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc_'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc%af'",
@@ -2223,7 +2223,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc\\__'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 123",

diff --git a/pkg/util/ranger/checker.go b/pkg/util/ranger/checker.go
@@ -166,6 +166,19 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
 	if err != nil {
 		return false, true
 	}
+<<<<<<< HEAD
+=======
+	likeFuncReserve := !c.isFullLengthColumn()
+
+	// Different from `=`, trailing spaces are always significant, and can't be ignored in `like`.
+	// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
+	// unable to distinguish 'xxx' from 'xxx   ' by a single index range scan, and we may read more data than needed by
+	// the `like` function. Therefore, a Selection is needed to filter the data.
+	if isPadSpaceCollation(collation) {
+		likeFuncReserve = true
+	}
+
+>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
 	if len(patternStr) == 0 {
 		return true, !c.isFullLengthColumn()
 	}

diff --git a/pkg/util/ranger/points.go b/pkg/util/ranger/points.go
@@ -24,6 +24,7 @@ import (
 	"github.com/pingcap/tidb/pkg/errno"
 	"github.com/pingcap/tidb/pkg/expression"
 	"github.com/pingcap/tidb/pkg/parser/ast"
+	"github.com/pingcap/tidb/pkg/parser/charset"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
 	"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
 	"github.com/pingcap/tidb/pkg/types"
@@ -678,9 +679,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
 			break
 		} else if pattern[i] == '_' {
 			// Get the prefix, but exclude the prefix.
-			// e.g., "abc_x", the start point exclude "abc",
-			// because the string length is more than 3.
-			exclude = true
+			// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
+			//
+			// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
+			// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
+			// 'xxx' from 'xxx   ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
+			// we will actually miss 'xxx   ', which will cause wrong results.
+			if !isPadSpaceCollation(collation) {
+				exclude = true
+			}
 			isExactMatch = false
 			break
 		}
@@ -715,7 +722,19 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
 	return []*point{startPoint, endPoint}
 }
 
+<<<<<<< HEAD
 func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point {
+=======
+// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
+// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
+// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
+// implemented in the future.
+func isPadSpaceCollation(collation string) bool {
+	return collation != charset.CollationBin
+}
+
+func (r *builder) buildFromNot(expr *expression.ScalarFunction, prefixLen int) []*point {
+>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
 	switch n := expr.FuncName.L; n {
 	case ast.IsTruthWithoutNull:
 		return r.buildFromIsTrue(expr, 1, false)

diff --git a/pkg/util/ranger/ranger_test.go b/pkg/util/ranger/ranger_test.go
@@ -1106,7 +1106,7 @@ create table t(
 			exprStr:     "a LIKE 'abc_'",
 			accessConds: "[like(test.t.a, abc_, 92)]",
 			filterConds: "[like(test.t.a, abc_, 92)]",
-			resultStr:   "[(\"abc\",\"abd\")]",
+			resultStr:   "[[\"abc\",\"abd\")]",
 		},
 		{
 			indexPos:    0,

diff --git a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result
@@ -180,3 +180,139 @@ LEFT JOIN tmp3 c3 ON c3.id = '1';
 id	id
 1	1
 1	1
+<<<<<<< HEAD
+=======
+drop table if exists t;
+create table t(a int, b int);
+set @@tidb_max_chunk_size = 32;
+insert into t values(1, 1);
+insert into t select a+1, a+1 from t;
+insert into t select a+2, a+2 from t;
+insert into t select a+4, a+4 from t;
+insert into t select a+8, a+8 from t;
+insert into t select a+16, a+16 from t;
+insert into t select a+32, a+32 from t;
+select a from (select 100 as a, 100 as b union all select * from t) t where b != 0;
+a
+100
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+set @@tidb_max_chunk_size = default;
+drop table if exists t1, t2;
+create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
+insert into t1 value('测试'),('测试  '),('xxx ');
+explain format = brief select *,length(a) from t1 where a like '测试 %';
+id	estRows	task	access object	operator info
+Projection	250.00	root		planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
+└─UnionScan	250.00	root		like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
+  └─IndexReader	250.00	root		index:Selection
+    └─Selection	250.00	cop[tikv]		like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
+      └─IndexRangeScan	250.00	cop[tikv]	table:t1, index:ia(a)	range:["测试 ","测试!"), keep order:false, stats:pseudo
+explain format = brief select *,length(a) from t1 where a like '测试';
+id	estRows	task	access object	operator info
+Projection	10.00	root		planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
+└─UnionScan	10.00	root		like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
+  └─IndexReader	10.00	root		index:Selection
+    └─Selection	10.00	cop[tikv]		like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
+      └─IndexRangeScan	10.00	cop[tikv]	table:t1, index:ia(a)	range:["测试","测试"], keep order:false, stats:pseudo
+select *,length(a) from t1 where a like '测试 %';
+a	length(a)
+测试  	8
+select *,length(a) from t1 where a like '测试';
+a	length(a)
+测试	6
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+id	estRows	task	access object	operator info
+Projection	250.00	root		planner__core__issuetest__planner_issue.t1.a
+└─UnionScan	250.00	root		like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
+  └─IndexReader	250.00	root		index:Selection
+    └─Selection	250.00	cop[tikv]		like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
+      └─IndexRangeScan	250.00	cop[tikv]	table:t1, index:ia(a)	range:["xxx","xxy"), keep order:false, stats:pseudo
+select * from t1 use index (ia) where a like 'xxx_';
+a
+xxx 
+create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
+insert into t2 value('测试'),('测试  ');
+explain format = brief select *,length(a) from t2 where a like '测试 %';
+id	estRows	task	access object	operator info
+Projection	8000.00	root		planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
+└─UnionScan	8000.00	root		like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
+  └─TableReader	8000.00	root		data:Selection
+    └─Selection	8000.00	cop[tikv]		like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
+      └─TableFullScan	10000.00	cop[tikv]	table:t2	keep order:false, stats:pseudo
+explain format = brief select *,length(a) from t2 where a like '测试';
+id	estRows	task	access object	operator info
+Projection	8000.00	root		planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
+└─UnionScan	8000.00	root		like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
+  └─TableReader	8000.00	root		data:Selection
+    └─Selection	8000.00	cop[tikv]		like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
+      └─TableFullScan	10000.00	cop[tikv]	table:t2	keep order:false, stats:pseudo
+select *,length(a) from t2 where a like '测试 %';
+a	length(a)
+测试  	6
+select *,length(a) from t2 where a like '测试';
+a	length(a)
+测试	4
+>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
diff --git a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test
@@ -136,3 +136,39 @@ FROM
   t2 db 
   LEFT JOIN tmp3 c2 ON c2.id = '1' 
   LEFT JOIN tmp3 c3 ON c3.id = '1';
+<<<<<<< HEAD
+=======
+
+# https://github.com/pingcap/tidb/issues/48755
+drop table if exists t;
+create table t(a int, b int);
+set @@tidb_max_chunk_size = 32;
+# insert into more than 32 rows to the table.
+insert into t values(1, 1);
+insert into t select a+1, a+1 from t;
+insert into t select a+2, a+2 from t;
+insert into t select a+4, a+4 from t;
+insert into t select a+8, a+8 from t;
+insert into t select a+16, a+16 from t;
+insert into t select a+32, a+32 from t;
+select a from (select 100 as a, 100 as b union all select * from t) t where b != 0;
+set @@tidb_max_chunk_size = default;
+
+# https://github.com/pingcap/tidb/issues/48821
+# https://github.com/pingcap/tidb/issues/48983
+drop table if exists t1, t2;
+create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
+insert into t1 value('测试'),('测试  '),('xxx ');
+explain format = brief select *,length(a) from t1 where a like '测试 %';
+explain format = brief select *,length(a) from t1 where a like '测试';
+select *,length(a) from t1 where a like '测试 %';
+select *,length(a) from t1 where a like '测试';
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+select * from t1 use index (ia) where a like 'xxx_';
+create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
+insert into t2 value('测试'),('测试  ');
+explain format = brief select *,length(a) from t2 where a like '测试 %';
+explain format = brief select *,length(a) from t2 where a like '测试';
+select *,length(a) from t2 where a like '测试 %';
+select *,length(a) from t2 where a like '测试';
+>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))