pingcap · ti-chi-bot · Nov 24, 2023
diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json
@@ -2171,11 +2171,11 @@
       },
       {
         "SQL": "select a from t where c_str like ''",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]]->Sel([like(test.t.c_str, , 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]]->Sel([like(test.t.c_str, abc, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str not like 'abc'",
@@ -2191,7 +2191,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc%'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc%, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc_'",
@@ -2203,31 +2203,31 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc\\_' escape ''",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc\\_'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc\\\\_'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc\\_%'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\_%, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc=_%' escape '='",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc=_%, 61)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc\\__'",
         "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 123",
-        "Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]])->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]]->Sel([like(test.t.c_str, 123, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c = 1.9 and d > 3",

diff --git a/pkg/util/ranger/checker.go b/pkg/util/ranger/checker.go
@@ -17,6 +17,7 @@ package ranger
 import (
 	"github.com/pingcap/tidb/pkg/expression"
 	"github.com/pingcap/tidb/pkg/parser/ast"
+	"github.com/pingcap/tidb/pkg/parser/charset"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
 	"github.com/pingcap/tidb/pkg/types"
 	"github.com/pingcap/tidb/pkg/util/collate"
@@ -166,11 +167,22 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
 	if err != nil {
 		return false, true
 	}
+	likeFuncReserve := !c.isFullLengthColumn()
+
+	// Different from `=`, trailing spaces are always significant, and can't be ignored in `like`.
+	// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
+	// unable to distinguish 'xxx' from 'xxx   ' by a single index range scan, and we may read more data than needed by
+	// the `like` function. Therefore, a Selection is needed to filter the data.
+	// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
+	// collation != binary check here.
+	if collation != charset.CollationBin {
+		likeFuncReserve = true
+	}
+
 	if len(patternStr) == 0 {
-		return true, !c.isFullLengthColumn()
+		return true, likeFuncReserve
 	}
 	escape := byte(scalar.GetArgs()[2].(*expression.Constant).Value.GetInt64())
-	likeFuncReserve := !c.isFullLengthColumn()
 	for i := 0; i < len(patternStr); i++ {
 		if patternStr[i] == escape {
 			i++

diff --git a/pkg/util/ranger/ranger_test.go b/pkg/util/ranger/ranger_test.go
@@ -1098,7 +1098,7 @@ create table t(
 			indexPos:    0,
 			exprStr:     `a LIKE 'abc%'`,
 			accessConds: `[like(test.t.a, abc%, 92)]`,
-			filterConds: "[]",
+			filterConds: "[like(test.t.a, abc%, 92)]",
 			resultStr:   "[[\"abc\",\"abd\")]",
 		},
 		{
@@ -1112,14 +1112,14 @@ create table t(
 			indexPos:    0,
 			exprStr:     "a LIKE 'abc'",
 			accessConds: "[like(test.t.a, abc, 92)]",
-			filterConds: "[]",
+			filterConds: "[like(test.t.a, abc, 92)]",
 			resultStr:   "[[\"abc\",\"abc\"]]",
 		},
 		{
 			indexPos:    0,
 			exprStr:     `a LIKE "ab\_c"`,
 			accessConds: "[like(test.t.a, ab\\_c, 92)]",
-			filterConds: "[]",
+			filterConds: "[like(test.t.a, ab\\_c, 92)]",
 			resultStr:   "[[\"ab_c\",\"ab_c\"]]",
 		},
 		{
@@ -1133,21 +1133,21 @@ create table t(
 			indexPos:    0,
 			exprStr:     `a LIKE '\%a'`,
 			accessConds: "[like(test.t.a, \\%a, 92)]",
-			filterConds: "[]",
+			filterConds: "[like(test.t.a, \\%a, 92)]",
 			resultStr:   `[["%a","%a"]]`,
 		},
 		{
 			indexPos:    0,
 			exprStr:     `a LIKE "\\"`,
 			accessConds: "[like(test.t.a, \\, 92)]",
-			filterConds: "[]",
+			filterConds: "[like(test.t.a, \\, 92)]",
 			resultStr:   "[[\"\\\\\",\"\\\\\"]]",
 		},
 		{
 			indexPos:    0,
 			exprStr:     `a LIKE "\\\\a%"`,
 			accessConds: `[like(test.t.a, \\a%, 92)]`,
-			filterConds: "[]",
+			filterConds: "[like(test.t.a, \\\\a%, 92)]",
 			resultStr:   "[[\"\\\\a\",\"\\\\b\")]",
 		},
 		{

diff --git a/tests/integrationtest/r/explain_generate_column_substitute.result b/tests/integrationtest/r/explain_generate_column_substitute.result
@@ -415,7 +415,8 @@ id	estRows	task	access object	operator info
 StreamAgg	1.00	root		funcs:count(Column#6)->Column#4
 └─IndexReader	1.00	root		index:StreamAgg
   └─StreamAgg	1.00	cop[tikv]		funcs:count(1)->Column#6
-    └─IndexRangeScan	250.00	cop[tikv]	table:tbl1, index:expression_index(md5(`s`))	range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
+    └─Selection	250.00	cop[tikv]		like(md5(cast(explain_generate_column_substitute.tbl1.s, var_string(20))), "02e74f10e0327ad868d138f2b4fdd6f%", 92)
+      └─IndexRangeScan	250.00	cop[tikv]	table:tbl1, index:expression_index(md5(`s`))	range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
 select count(*) from tbl1 use index() where md5(s) like '02e74f10e0327ad868d138f2b4fdd6f%';
 count(*)
 64