Fix check_access_control_on_utilized_columns_only for CTE

rschlussel · rschlussel · commit facc558fda76 · 2025-02-27T19:30:44.000-05:00
Previously we weren't associating the with query with the columns used
from it in the main part of the query, so we wouldn't consider any
columns as used.  When the usage within the cte or outside of it was
just an identifier, we would have the utilized column collected anyway
because the column collected in the main query kept the source table
information. However, if there was an expression in between, we would
lose that information, and we wouldn't check access control for the
column that was used in an expression in the cte.

Example:

For the following query
```
with cte as (select  x as c1, z + 1 as c2 from t13) select c1, c2 from (select * from cte)
```

Previously we would only check access permissions on t13.x, but not
t13.z.  With this change we will check column access on both 13.x and
t13.z.
diff --git a/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/RelationId.java b/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/RelationId.java
@@ -53,6 +53,10 @@ public boolean isAnonymous()
         return sourceNode == null;
     }
 
+    public Node getSourceNode()
+    {
+        return sourceNode;
+    }
     @Override
     public boolean equals(Object o)
     {
diff --git a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/UtilizedColumnsAnalyzer.java b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/UtilizedColumnsAnalyzer.java
@@ -37,6 +37,7 @@
 import com.facebook.presto.sql.tree.Lateral;
 import com.facebook.presto.sql.tree.Node;
 import com.facebook.presto.sql.tree.NodeRef;
+import com.facebook.presto.sql.tree.QualifiedName;
 import com.facebook.presto.sql.tree.Query;
 import com.facebook.presto.sql.tree.QuerySpecification;
 import com.facebook.presto.sql.tree.Relation;
@@ -48,7 +49,10 @@
 import com.facebook.presto.sql.tree.Union;
 import com.facebook.presto.sql.tree.Unnest;
 import com.facebook.presto.sql.tree.Values;
+import com.facebook.presto.sql.tree.With;
+import com.facebook.presto.sql.tree.WithQuery;
 import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 
 import java.util.HashMap;
@@ -57,28 +61,29 @@
 import java.util.Map.Entry;
 import java.util.Set;
 
+import static com.google.common.collect.ImmutableList.toImmutableList;
 import static com.google.common.collect.Sets.intersection;
 import static java.lang.String.format;
 
 /**
  * Finds all utilized columns in the query. Utilized columns are those that would have an "impact" on the query's results.
- *
+ * <p>
  * For example, in the query:
- *     SELECT nationkey FROM (SELECT * FROM nation WHERE name = 'USA')
+ * SELECT nationkey FROM (SELECT * FROM nation WHERE name = 'USA')
  * Even though all the columns in table nation are referenced by the query (in the SELECT * part), only the columns
  * "name" and "nationkey" have an "impact" on the query's results.
- *
+ * <p>
  * The high-level algorithm works as follows:
  * 1. Find all fields referenced in all clauses of the outermost SELECT query, and add them to an explore list.
  * 2. For each field reference F in the explore list, find its referenced relation R.
  * 3. If R is a SELECT query:
- *    a. Find the SELECT item expression that F references. Add all fields referenced by that expression to the explore list.
- *    b. Add all fields referenced by every other clause of the SELECT query to the explore list.
+ * a. Find the SELECT item expression that F references. Add all fields referenced by that expression to the explore list.
+ * b. Add all fields referenced by every other clause of the SELECT query to the explore list.
  * 4. Otherwise,
- *    a. Add F's referenced field to a referenced fields list.
- *    b. For each child of R, find the corresponding child of F, and add it to the explore list.
+ * a. Add F's referenced field to a referenced fields list.
+ * b. For each child of R, find the corresponding child of F, and add it to the explore list.
  * 5. Repeat from step 2 for all fields in the explore list, until all have been resolved to a base table relation.
- *
+ * <p>
  * The referenced fields list at the end of this algorithm will contain all the columns referenced by the query, that impact the output.
  * Step 3a is where fields that do not impact the output are pruned.
  */
@@ -271,6 +276,23 @@ protected Void visitQuerySpecification(QuerySpecification querySpec, Context con
             return null;
         }
 
+        @Override
+        protected Void visitWith(With node, Context context)
+        {
+            ImmutableList.copyOf(node.getQueries()).reverse().forEach(query -> process(query, context));
+
+            return null;
+        }
+
+        @Override
+        protected Void visitWithQuery(WithQuery withQuery, Context context)
+        {
+            context.copyFieldIdsToExploreForWithQuery(withQuery);
+            process(withQuery.getQuery(), context);
+
+            return null;
+        }
+
         @Override
         protected Void visitSampledRelation(SampledRelation sampledRelation, Context context)
         {
@@ -493,5 +515,21 @@ private void addFieldIdToExplore(FieldId fieldId)
         {
             fieldsToExplore.put(fieldId.getRelationId(), fieldId);
         }
+
+        // Associate the relation from the with clause with the fieldIdsToExplore that we collected for it
+        // when processing the main part of the query
+        public void copyFieldIdsToExploreForWithQuery(WithQuery withQuery)
+        {
+            QualifiedName name = QualifiedName.of(withQuery.getName().getValue());
+            List<RelationId> relationIds = fieldsToExplore.keySet().stream()
+                    .filter(key -> key.getSourceNode() instanceof Table && ((Table) key.getSourceNode()).getName().equals(name))
+                    .collect(toImmutableList());
+            if (relationIds.size() != 1) {
+                throw new UnsupportedOperationException("Multiple relations with the same name are not supported by UtilizedColumnAnalyzer");
+            }
+            fieldsToExplore.putAll(
+                    RelationId.of(withQuery.getQuery().getQueryBody()),
+                    fieldsToExplore.get(relationIds.get(0)));
+        }
     }
 }
diff --git a/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestUtilizedColumnsAnalyzer.java b/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestUtilizedColumnsAnalyzer.java
@@ -519,6 +519,33 @@ public void testUDF()
                 ImmutableMap.of(QualifiedObjectName.valueOf("tpch.s1.t1"), ImmutableSet.of("a")));
     }
 
+    @Test
+    public void testCteWithExpressionInSelect()
+    {
+        assertUtilizedTableColumns(
+                "with cte as (select  x as c1, y as c2, z + 1 as c3 from t13) select c1, c3 from (select * from cte)",
+                ImmutableMap.of(QualifiedObjectName.valueOf("tpch.s1.t13"), ImmutableSet.of("x", "z")));
+    }
+
+    @Test
+    public void testMultipleCtes()
+    {
+        assertUtilizedTableColumns(
+                "with cte1 as (select  x as c1, y as c2, z + 1 as c3 from t13), cte2 as (select c1 + 1 as a, c3 as b from cte1) select a, b from (select * from cte2)",
+                ImmutableMap.of(QualifiedObjectName.valueOf("tpch.s1.t13"), ImmutableSet.of("x", "z")));
+        assertUtilizedTableColumns(
+                "with cte1 as (select  x as c1, z + 1 as c2, y from t13), cte2 as (select c1 + 1 c3, c2 +1 as c4 from cte1) select c3 from cte2",
+                ImmutableMap.of(QualifiedObjectName.valueOf("tpch.s1.t13"), ImmutableSet.of("x")));
+    }
+
+    @Test
+    public void testMultipleCtesWithSameNameFallsBackToAllColumns()
+    {
+        assertUtilizedTableColumns(
+                "with cte1 as (select  x + 1 as c1, y as c2, z + 1 as c3 from t13), cte2 as (with cte1 AS (select y +1 as c1 from t13) select * from cte1) SELECT cte1.c1, cte2.c1 from cte1 join cte2 on cte1.c1=cte2.c1",
+                ImmutableMap.of(QualifiedObjectName.valueOf("tpch.s1.t13"), ImmutableSet.of("x", "y", "z")));
+    }
+
     private void assertUtilizedTableColumns(@Language("SQL") String query, Map<QualifiedObjectName, Set<String>> expected)
     {
         transaction(transactionManager, accessControl)

Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,10 @@ public boolean isAnonymous()`
`53`	`53`	`return sourceNode == null;`
`54`	`54`	`}`
`55`	`55`
	`56`	`+ public Node getSourceNode()`
	`57`	`+ {`
	`58`	`+ return sourceNode;`
	`59`	`+ }`
`56`	`60`	`@Override`
`57`	`61`	`public boolean equals(Object o)`
`58`	`62`	`{`