Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: variable tidb_opt_enable_hash_join to skip hash join (#46575) #47359

Merged
merged 2 commits into from
Oct 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ func (p *LogicalJoin) GetMergeJoin(prop *property.PhysicalProperty, schema *expr
// If TiDB_SMJ hint is existed, it should consider enforce merge join,
// because we can't trust lhsChildProperty completely.
if (p.preferJoinType&preferMergeJoin) > 0 ||
(p.preferJoinType&preferNoHashJoin) > 0 { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error.
p.shouldSkipHashJoin() { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error.
joins = append(joins, p.getEnforcedMergeJoin(prop, schema, statsInfo)...)
}

Expand Down Expand Up @@ -388,6 +388,10 @@ var ForceUseOuterBuild4Test = atomic.NewBool(false)
// TODO: use hint and remove this variable
var ForcedHashLeftJoin4Test = atomic.NewBool(false)

func (p *LogicalJoin) shouldSkipHashJoin() bool {
return (p.preferJoinType&preferNoHashJoin) > 0 || (p.SCtx().GetSessionVars().DisableHashJoin)
}

func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []PhysicalPlan, forced bool) {
if !prop.IsSortItemEmpty() { // hash join doesn't promise any orders
return
Expand Down Expand Up @@ -448,12 +452,12 @@ func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []Phy
}

forced = (p.preferJoinType&preferHashJoin > 0) || forceLeftToBuild || forceRightToBuild
noHashJoin := (p.preferJoinType & preferNoHashJoin) > 0
if !forced && noHashJoin {
if !forced && p.shouldSkipHashJoin() {
return nil, false
} else if forced && noHashJoin {
} else if forced && p.shouldSkipHashJoin() {
p.ctx.GetSessionVars().StmtCtx.AppendWarning(ErrInternal.GenWithStack(
"Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"))
"A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, " +
"or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."))
}
return
}
Expand Down
13 changes: 13 additions & 0 deletions planner/core/rule_join_reorder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ func TestNoHashJoinHint(t *testing.T) {
runJoinReorderTestData(t, tk, "TestNoHashJoinHint")
}

// test the global/session variable tidb_opt_enable_hash_join being set to no
func TestOptEnableHashJoin(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("set tidb_opt_enable_hash_join=off")
tk.MustExec("create table t1(a int, b int, key(a));")
tk.MustExec("create table t2(a int, b int, key(a));")
tk.MustExec("create table t3(a int, b int, key(a));")
tk.MustExec("create table t4(a int, b int, key(a));")
runJoinReorderTestData(t, tk, "TestOptEnableHashJoin")
}

func TestNoMergeJoinHint(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
25 changes: 20 additions & 5 deletions planner/core/testdata/join_reorder_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@
"select /*+ no_hash_join(t2) */ * from t1 right join t2 on t1.a=t2.a"
]
},
{
"name": "TestOptEnableHashJoin",
"cases": [
"select * from t1, t2",
"select * from t1, t2 where t1.a=t2.a",
"select * from t1, t2 where t1.b=t2.b",
"select * from t1, t2 where t1.a=t2.a and t1.b=t2.b",
"select * from t1 left join t2 on t1.b=t2.b",
"select * from t1 left join t2 on t1.a=t2.a",
"select * from t1 right join t2 on t1.b=t2.b",
"select * from t1 right join t2 on t1.a=t2.a",
"select /*+ hash_join(t1) */ * from t1, t2",
"select /*+ hash_join(t2) */ * from t1, t2"
]
},
{
"name": "TestNoIndexJoinHint",
"cases": [
Expand Down Expand Up @@ -509,11 +524,11 @@
{
"name": "TestAdditionOtherConditionsRemained4OuterJoin",
"cases": [
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` RIGHT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"explain format = 'brief' SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;",
// The where clause should be a Selection out of joins.
"explain format='brief' select * from t left join t1 on t.a=t1.a inner join t2 on t.a=t2.a and t2.c = 100 left join t3 on t2.a=t3.a and t3.b > 1 left join t4 on t2.a = t4.a where (t2.b > 100 or t.a > 10 or t1.b < 10)"
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` RIGHT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"explain format = 'brief' SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;",
// The where clause should be a Selection out of joins.
"explain format='brief' select * from t left join t1 on t.a=t1.a inner join t2 on t.a=t2.a and t2.c = 100 left join t3 on t2.a=t3.a and t3.b > 1 left join t4 on t2.a = t4.a where (t2.b > 100 or t.a > 10 or t1.b < 10)"
]
}
]
144 changes: 142 additions & 2 deletions planner/core/testdata/join_reorder_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
Expand All @@ -622,7 +622,7 @@
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
Expand Down Expand Up @@ -747,6 +747,146 @@
}
]
},
{
"Name": "TestOptEnableHashJoin",
"Cases": [
{
"SQL": "select * from t1, t2",
"Plan": [
"MergeJoin 100000000.00 root inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)",
"├─TableReader(Build) 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root inner join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t2.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─Sort(Probe) 9990.00 root test.t1.b",
" └─TableReader 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.a=t2.a and t1.b=t2.b",
"Plan": [
"IndexHashJoin 12475.01 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a), eq(test.t1.b, test.t2.b)",
"├─TableReader(Build) 9980.01 root data:Selection",
"│ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12475.01 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 12475.01 cop[tikv] not(isnull(test.t2.b))",
" └─TableRowIDScan 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 left join t2 on t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root left outer join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t2.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─Sort(Probe) 10000.00 root test.t1.b",
" └─TableReader 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 left join t2 on t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root left outer join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 right join t2 on t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root right outer join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t1.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─Sort(Probe) 10000.00 root test.t2.b",
" └─TableReader 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 right join t2 on t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root right outer join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t1.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select /*+ hash_join(t1) */ * from t1, t2",
"Plan": [
"HashJoin 100000000.00 root CARTESIAN inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
"SQL": "select /*+ hash_join(t2) */ * from t1, t2",
"Plan": [
"HashJoin 100000000.00 root CARTESIAN inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
}
]
},
{
"Name": "TestNoIndexJoinHint",
"Cases": [
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,9 @@ type SessionVars struct {
// EnableOuterJoinWithJoinReorder enables TiDB to involve the outer join into the join reorder.
EnableOuterJoinReorder bool

// DisableHashJoin indicates whether to disable hash join.
DisableHashJoin bool

// OptimizerEnableNAAJ enables TiDB to use null-aware anti join.
OptimizerEnableNAAJ bool

Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ var defaultSysVars = []*SysVar{
s.EnableOuterJoinReorder = TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBOptEnableHashJoin, Value: BoolToOnOff(DefTiDBOptEnableHashJoin), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.DisableHashJoin = !TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBOptimizerEnableNAAJ, Value: BoolToOnOff(DefTiDBEnableNAAJ), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.OptimizerEnableNAAJ = TiDBOptOn(val)
return nil
Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,9 @@ const (
// we'll choose a rather time-consuming algorithm to calculate the join order.
TiDBOptJoinReorderThreshold = "tidb_opt_join_reorder_threshold"

// TiDBOptEnableHashJoin indicates whether to enable hash join.
TiDBOptEnableHashJoin = "tidb_opt_enable_hash_join"

// TiDBSlowQueryFile indicates which slow query log file for SLOW_QUERY table to parse.
TiDBSlowQueryFile = "tidb_slow_query_file"

Expand Down Expand Up @@ -982,6 +985,7 @@ const (
DefTiDBOptimizerSelectivityLevel = 0
DefTiDBOptimizerEnableNewOFGB = false
DefTiDBEnableOuterJoinReorder = true
DefTiDBOptEnableHashJoin = true
DefTiDBEnableNAAJ = false
DefTiDBAllowBatchCop = 1
DefTiDBAllowMPPExecution = true
Expand Down