From 04ddb6eb08988c54782bd808e110e7889b6ec170 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 20 Mar 2024 09:52:42 -0400 Subject: [PATCH] Refine documentation in common_subexpr_eliminate.rs --- .../optimizer/src/common_subexpr_eliminate.rs | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 33b6699a2d38..0f3a3874c553 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -41,21 +41,36 @@ use datafusion_expr::{col, Expr, ExprSchemable}; /// - DataType of this expression. type ExprSet = HashMap; -/// An ordered map of Identifiers encountered during visitation. +/// An ordered map of Identifiers assigned by `ExprIdentifierVisitor` in an +/// initial expression walk. /// -/// Is created in the ExprIdentifierVisitor, which identifies the common expressions. -/// Is consumed in the CommonSubexprRewriter, which performs mutations. +/// Used by `CommonSubexprRewriter`, which rewrites the expressions to remove +/// common subexpressions. /// -/// Vec idx is ordered by expr nodes visited on f_down. +/// Elements in this array are created on the walk down the expression tree +/// during `f_down`. Thus element 0 is the root of the expression tree. The +/// tuple contains: /// - series_number. -/// - Incr in fn_up, start from 1. -/// - the higher idx have the lower series_number. -/// - Identifier. -/// - is empty ("") if expr should not be considered for common elimation. +/// - Incremented during `f_up`, start from 1. +/// - Thus, items with higher idx have the lower series_number. +/// - [`Identifier`] +/// - Identifier of the expression. If empty (`""`), expr should not be considered for common elimination. +/// +/// # Example +/// An expression like `(a + b)` would have the following `IdArray`: +/// ```text +/// [ +/// (3, "a + b"), +/// (2, "a"), +/// (1, "b") +/// ] +/// ``` type IdArray = Vec<(usize, Identifier)>; -/// Identifier type. Current implementation use describe of an expression (type String) as -/// Identifier. +/// Identifier for each subexpression. +/// +/// Note that the current implementation uses the `Display` of an expression +/// (a `String`) as `Identifier`. /// /// An identifier should (ideally) be able to "hash", "accumulate", "equal" and "have no /// collision (as low as possible)"