|
37 | 37 | import com.facebook.presto.sql.tree.Lateral;
|
38 | 38 | import com.facebook.presto.sql.tree.Node;
|
39 | 39 | import com.facebook.presto.sql.tree.NodeRef;
|
| 40 | +import com.facebook.presto.sql.tree.QualifiedName; |
40 | 41 | import com.facebook.presto.sql.tree.Query;
|
41 | 42 | import com.facebook.presto.sql.tree.QuerySpecification;
|
42 | 43 | import com.facebook.presto.sql.tree.Relation;
|
|
48 | 49 | import com.facebook.presto.sql.tree.Union;
|
49 | 50 | import com.facebook.presto.sql.tree.Unnest;
|
50 | 51 | import com.facebook.presto.sql.tree.Values;
|
| 52 | +import com.facebook.presto.sql.tree.With; |
| 53 | +import com.facebook.presto.sql.tree.WithQuery; |
51 | 54 | import com.google.common.collect.HashMultimap;
|
| 55 | +import com.google.common.collect.ImmutableList; |
52 | 56 | import com.google.common.collect.ImmutableSet;
|
53 | 57 |
|
54 | 58 | import java.util.HashMap;
|
|
57 | 61 | import java.util.Map.Entry;
|
58 | 62 | import java.util.Set;
|
59 | 63 |
|
| 64 | +import static com.google.common.collect.ImmutableList.toImmutableList; |
60 | 65 | import static com.google.common.collect.Sets.intersection;
|
61 | 66 | import static java.lang.String.format;
|
62 | 67 |
|
63 | 68 | /**
|
64 | 69 | * Finds all utilized columns in the query. Utilized columns are those that would have an "impact" on the query's results.
|
65 |
| - * |
| 70 | + * <p> |
66 | 71 | * For example, in the query:
|
67 |
| - * SELECT nationkey FROM (SELECT * FROM nation WHERE name = 'USA') |
| 72 | + * SELECT nationkey FROM (SELECT * FROM nation WHERE name = 'USA') |
68 | 73 | * Even though all the columns in table nation are referenced by the query (in the SELECT * part), only the columns
|
69 | 74 | * "name" and "nationkey" have an "impact" on the query's results.
|
70 |
| - * |
| 75 | + * <p> |
71 | 76 | * The high-level algorithm works as follows:
|
72 | 77 | * 1. Find all fields referenced in all clauses of the outermost SELECT query, and add them to an explore list.
|
73 | 78 | * 2. For each field reference F in the explore list, find its referenced relation R.
|
74 | 79 | * 3. If R is a SELECT query:
|
75 |
| - * a. Find the SELECT item expression that F references. Add all fields referenced by that expression to the explore list. |
76 |
| - * b. Add all fields referenced by every other clause of the SELECT query to the explore list. |
| 80 | + * a. Find the SELECT item expression that F references. Add all fields referenced by that expression to the explore list. |
| 81 | + * b. Add all fields referenced by every other clause of the SELECT query to the explore list. |
77 | 82 | * 4. Otherwise,
|
78 |
| - * a. Add F's referenced field to a referenced fields list. |
79 |
| - * b. For each child of R, find the corresponding child of F, and add it to the explore list. |
| 83 | + * a. Add F's referenced field to a referenced fields list. |
| 84 | + * b. For each child of R, find the corresponding child of F, and add it to the explore list. |
80 | 85 | * 5. Repeat from step 2 for all fields in the explore list, until all have been resolved to a base table relation.
|
81 |
| - * |
| 86 | + * <p> |
82 | 87 | * The referenced fields list at the end of this algorithm will contain all the columns referenced by the query, that impact the output.
|
83 | 88 | * Step 3a is where fields that do not impact the output are pruned.
|
84 | 89 | */
|
@@ -271,6 +276,23 @@ protected Void visitQuerySpecification(QuerySpecification querySpec, Context con
|
271 | 276 | return null;
|
272 | 277 | }
|
273 | 278 |
|
| 279 | + @Override |
| 280 | + protected Void visitWith(With node, Context context) |
| 281 | + { |
| 282 | + ImmutableList.copyOf(node.getQueries()).reverse().forEach(query -> process(query, context)); |
| 283 | + |
| 284 | + return null; |
| 285 | + } |
| 286 | + |
| 287 | + @Override |
| 288 | + protected Void visitWithQuery(WithQuery withQuery, Context context) |
| 289 | + { |
| 290 | + context.copyFieldIdsToExploreForWithQuery(withQuery); |
| 291 | + process(withQuery.getQuery(), context); |
| 292 | + |
| 293 | + return null; |
| 294 | + } |
| 295 | + |
274 | 296 | @Override
|
275 | 297 | protected Void visitSampledRelation(SampledRelation sampledRelation, Context context)
|
276 | 298 | {
|
@@ -493,5 +515,21 @@ private void addFieldIdToExplore(FieldId fieldId)
|
493 | 515 | {
|
494 | 516 | fieldsToExplore.put(fieldId.getRelationId(), fieldId);
|
495 | 517 | }
|
| 518 | + |
| 519 | + // Associate the relation from the with clause with the fieldIdsToExplore that we collected for it |
| 520 | + // when processing the main part of the query |
| 521 | + public void copyFieldIdsToExploreForWithQuery(WithQuery withQuery) |
| 522 | + { |
| 523 | + QualifiedName name = QualifiedName.of(withQuery.getName().getValue()); |
| 524 | + List<RelationId> relationIds = fieldsToExplore.keySet().stream() |
| 525 | + .filter(key -> key.getSourceNode() instanceof Table && ((Table) key.getSourceNode()).getName().equals(name)) |
| 526 | + .collect(toImmutableList()); |
| 527 | + if (relationIds.size() != 1) { |
| 528 | + throw new UnsupportedOperationException("Multiple relations with the same name are not supported by UtilizedColumnAnalyzer"); |
| 529 | + } |
| 530 | + fieldsToExplore.putAll( |
| 531 | + RelationId.of(withQuery.getQuery().getQueryBody()), |
| 532 | + fieldsToExplore.get(relationIds.get(0))); |
| 533 | + } |
496 | 534 | }
|
497 | 535 | }
|
0 commit comments