Skip to content

Commit

Permalink
Merge upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyum committed Aug 28, 2020
1 parent d41df58 commit 6b2a2da
Show file tree
Hide file tree
Showing 102 changed files with 1,030 additions and 938 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
override protected val excludedOnceBatches: Set[String] =
Set(
"PartitionPruning",
"Rewrite Subquery",
"Extract Python UDFs")

protected def fixedPoint =
Expand Down Expand Up @@ -214,7 +215,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
RewritePredicateSubquery,
ColumnPruning,
InferFiltersFromConstraints,
PushDownPredicate,
PushDownPredicates,
CollapseProject,
CombineFilters,
PruneFilters,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class RewriteSubquerySuite extends PlanTest {
RewritePredicateSubquery,
ColumnPruning,
InferFiltersFromConstraints,
PushDownPredicate,
PushDownPredicates,
CollapseProject,
CombineFilters,
RemoveNoopOperators) :: Nil
Expand Down Expand Up @@ -80,6 +80,7 @@ class RewriteSubquerySuite extends PlanTest {
.where(IsNotNull('a)).select('a)
.join(relInSubquery.where(IsNotNull('x) && IsNotNull('y) && 'y > 1).select('x),
LeftSemi, Some('a === 'x))
}

test("NOT-IN subquery nested inside OR") {
val relation1 = LocalRelation('a.int, 'b.int)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ TakeOrderedAndProject (58)
Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
Batched: true
Location [not included in comparison]/{warehouse_dir}/customer]
PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)]
PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_customer_sk)]
ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_addr_sk:int>

(2) ColumnarToRow [codegen id : 1]
Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]

(3) Filter [codegen id : 1]
Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4))
Condition : ((isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) AND isnotnull(c_customer_sk#3))

(4) Exchange
Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
Expand All @@ -85,29 +85,29 @@ Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0
Output [2]: [ss_sold_date_sk#7, ss_customer_sk#8]
Batched: true
Location [not included in comparison]/{warehouse_dir}/store_sales]
PushedFilters: [IsNotNull(ss_sold_date_sk)]
PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)]
ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int>

(7) ColumnarToRow [codegen id : 4]
Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8]

(8) Filter [codegen id : 4]
Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8]
Condition : isnotnull(ss_sold_date_sk#7)
Condition : (isnotnull(ss_sold_date_sk#7) AND isnotnull(ss_customer_sk#8))

(9) Scan parquet default.date_dim
Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
Batched: true
Location [not included in comparison]/{warehouse_dir}/date_dim]
PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)]
PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)]
ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>

(10) ColumnarToRow [codegen id : 3]
Input [3]: [d_date_sk#9, d_year#10, d_moy#11]

(11) Filter [codegen id : 3]
Input [3]: [d_date_sk#9, d_year#10, d_moy#11]
Condition : (((((isnotnull(d_year#10) AND isnotnull(d_moy#11)) AND (d_year#10 = 2002)) AND (d_moy#11 >= 1)) AND (d_moy#11 <= 4)) AND isnotnull(d_date_sk#9))
Condition : (((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_year#10 = 2002)) AND (d_moy#11 >= 1)) AND (d_moy#11 <= 4)) AND isnotnull(d_date_sk#9))

(12) Project [codegen id : 3]
Output [1]: [d_date_sk#9]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep
InputAdapter
Exchange [c_customer_sk] #3
WholeStageCodegen (1)
Filter [c_current_addr_sk,c_current_cdemo_sk]
Filter [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk]
Expand All @@ -37,7 +37,7 @@ TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep
WholeStageCodegen (4)
Project [ss_customer_sk]
BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
Filter [ss_sold_date_sk]
Filter [ss_customer_sk,ss_sold_date_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,29 +55,29 @@ TakeOrderedAndProject (50)
Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
Batched: true
Location [not included in comparison]/{warehouse_dir}/customer]
PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)]
PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_customer_sk)]
ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_addr_sk:int>

(2) ColumnarToRow [codegen id : 9]
Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]

(3) Filter [codegen id : 9]
Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4))
Condition : ((isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) AND isnotnull(c_customer_sk#3))

(4) Scan parquet default.store_sales
Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7]
Batched: true
Location [not included in comparison]/{warehouse_dir}/store_sales]
PushedFilters: [IsNotNull(ss_sold_date_sk)]
PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)]
ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int>

(5) ColumnarToRow [codegen id : 2]
Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7]

(6) Filter [codegen id : 2]
Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7]
Condition : isnotnull(ss_sold_date_sk#6)
Condition : (isnotnull(ss_sold_date_sk#6) AND isnotnull(ss_customer_sk#7))

(7) Scan parquet default.date_dim
Output [3]: [d_date_sk#8, d_year#9, d_moy#10]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep
BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk]
BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk]
BroadcastHashJoin [c_customer_sk,ss_customer_sk]
Filter [c_current_addr_sk,c_current_cdemo_sk]
Filter [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk]
Expand All @@ -23,7 +23,7 @@ TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep
WholeStageCodegen (2)
Project [ss_customer_sk]
BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
Filter [ss_sold_date_sk]
Filter [ss_customer_sk,ss_sold_date_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,15 @@ Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0
Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)]
PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_item_sk)]
ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>

(7) ColumnarToRow [codegen id : 17]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]

(8) Filter [codegen id : 17]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Condition : ((isnotnull(i_class_id#8) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9))
Condition : (((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_item_sk#6))

(9) Scan parquet default.store_sales
Output [2]: [ss_sold_date_sk#1, ss_item_sk#2]
Expand Down Expand Up @@ -217,15 +217,15 @@ Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, d_date_sk#10]
Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)]
PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>

(20) ColumnarToRow [codegen id : 4]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]

(21) Filter [codegen id : 4]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_category_id#9)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8))
Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))

(22) BroadcastExchange
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sa
WholeStageCodegen (17)
Project [i_item_sk]
BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id]
Filter [i_brand_id,i_category_id,i_class_id]
Filter [i_brand_id,i_category_id,i_class_id,i_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1))
Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)]
PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_item_sk)]
ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>

(5) ColumnarToRow [codegen id : 11]
Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8]

(6) Filter [codegen id : 11]
Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8]
Condition : ((isnotnull(i_class_id#7) AND isnotnull(i_brand_id#6)) AND isnotnull(i_category_id#8))
Condition : (((isnotnull(i_class_id#7) AND isnotnull(i_brand_id#6)) AND isnotnull(i_category_id#8)) AND isnotnull(i_item_sk#5))

(7) Scan parquet default.store_sales
Output [2]: [ss_sold_date_sk#1, ss_item_sk#2]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sa
WholeStageCodegen (11)
Project [i_item_sk]
BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id]
Filter [i_brand_id,i_category_id,i_class_id]
Filter [i_brand_id,i_category_id,i_class_id,i_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,15 @@ Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0
Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)]
PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_item_sk)]
ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>

(7) ColumnarToRow [codegen id : 17]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]

(8) Filter [codegen id : 17]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Condition : ((isnotnull(i_class_id#8) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9))
Condition : (((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_item_sk#6))

(9) Scan parquet default.store_sales
Output [2]: [ss_sold_date_sk#1, ss_item_sk#2]
Expand Down Expand Up @@ -196,15 +196,15 @@ Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, d_date_sk#10]
Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)]
PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)]
ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>

(20) ColumnarToRow [codegen id : 4]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]

(21) Filter [codegen id : 4]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8))
Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_brand_id#7))

(22) BroadcastExchange
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Expand Down Expand Up @@ -437,15 +437,15 @@ Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, d_d
Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)]
PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)]
ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>

(73) ColumnarToRow [codegen id : 20]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]

(74) Filter [codegen id : 20]
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8))
Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_brand_id#7))

(75) Exchange
Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
Expand Down Expand Up @@ -766,15 +766,15 @@ Subquery:2 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquer
Output [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]
Batched: true
Location [not included in comparison]/{warehouse_dir}/date_dim]
PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)]
PushedFilters: [IsNotNull(d_dom), IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)]
ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>

(138) ColumnarToRow [codegen id : 1]
Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]

(139) Filter [codegen id : 1]
Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]
Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_dom#90)) AND (d_year#11 = 2000)) AND (d_moy#89 = 12)) AND (d_dom#90 = 11))
Condition : (((((isnotnull(d_dom#90) AND isnotnull(d_moy#89)) AND isnotnull(d_year#11)) AND (d_year#11 = 2000)) AND (d_moy#89 = 12)) AND (d_dom#90 = 11))

(140) Project [codegen id : 1]
Output [1]: [d_week_seq#29]
Expand All @@ -793,15 +793,15 @@ Subquery:4 Hosting operator id = 95 Hosting Expression = Subquery scalar-subquer
Output [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]
Batched: true
Location [not included in comparison]/{warehouse_dir}/date_dim]
PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)]
PushedFilters: [IsNotNull(d_dom), IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)]
ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>

(142) ColumnarToRow [codegen id : 1]
Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]

(143) Filter [codegen id : 1]
Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]
Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_dom#90)) AND (d_year#11 = 1999)) AND (d_moy#89 = 12)) AND (d_dom#90 = 11))
Condition : (((((isnotnull(d_dom#90) AND isnotnull(d_moy#89)) AND isnotnull(d_year#11)) AND (d_year#11 = 1999)) AND (d_moy#89 = 12)) AND (d_dom#90 = 11))

(144) Project [codegen id : 1]
Output [1]: [d_week_seq#29]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_cat
WholeStageCodegen (17)
Project [i_item_sk]
BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id]
Filter [i_brand_id,i_category_id,i_class_id]
Filter [i_brand_id,i_category_id,i_class_id,i_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk]
Expand Down
Loading

0 comments on commit 6b2a2da

Please sign in to comment.