Skip to content

Commit

Permalink
Framework of Calcite Engine: Parser, Catalog Binding and Plan Convert…
Browse files Browse the repository at this point in the history
…er (#3249)

* First commit for Calcite integration

Signed-off-by: Lantao Jin <ltjin@amazon.com>

* disable java security manager in IT

Signed-off-by: Lantao Jin <ltjin@amazon.com>

---------

Signed-off-by: Lantao Jin <ltjin@amazon.com>
  • Loading branch information
LantaoJin authored Jan 17, 2025
1 parent e7be8ca commit 4b93d60
Show file tree
Hide file tree
Showing 90 changed files with 4,033 additions and 248 deletions.
13 changes: 13 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,19 @@ allprojects {
resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib:1.9.10"
resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib-common:1.9.10"
resolutionStrategy.force "net.bytebuddy:byte-buddy:1.14.9"
resolutionStrategy.force "org.apache.httpcomponents.client5:httpclient5:5.3.1"
resolutionStrategy.force 'org.apache.httpcomponents.core5:httpcore5:5.2.5'
resolutionStrategy.force 'org.apache.httpcomponents.core5:httpcore5-h2:5.2.5'
resolutionStrategy.force 'com.fasterxml.jackson.core:jackson-annotations:2.17.2'
resolutionStrategy.force 'com.fasterxml.jackson:jackson-bom:2.17.2'
resolutionStrategy.force 'com.google.protobuf:protobuf-java:3.25.5'
resolutionStrategy.force 'org.locationtech.jts:jts-core:1.19.0'
resolutionStrategy.force 'com.google.errorprone:error_prone_annotations:2.28.0'
resolutionStrategy.force 'org.checkerframework:checker-qual:3.43.0'
resolutionStrategy.force 'org.apache.commons:commons-lang3:3.13.0'
resolutionStrategy.force 'org.apache.commons:commons-text:1.11.0'
resolutionStrategy.force 'commons-io:commons-io:2.15.0'
resolutionStrategy.force 'org.yaml:snakeyaml:2.2'
}
}

Expand Down
14 changes: 10 additions & 4 deletions core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,14 @@ dependencies {
api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}"
api group: 'com.google.code.gson', name: 'gson', version: '2.8.9'
api group: 'com.tdunning', name: 't-digest', version: '3.3'
api 'org.apache.calcite:calcite-core:1.38.0'
api 'org.apache.calcite:calcite-linq4j:1.38.0'
api project(':common')
implementation "com.github.seancfoley:ipaddress:5.4.2"

annotationProcessor('org.immutables:value:2.8.8')
compileOnly('org.immutables:value-annotations:2.8.8')

testImplementation('org.junit.jupiter:junit-jupiter:5.9.3')
testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: '2.1'
testImplementation group: 'org.mockito', name: 'mockito-core', version: '5.7.0'
Expand Down Expand Up @@ -113,22 +118,23 @@ jacocoTestCoverageVerification {
'org.opensearch.sql.utils.Constants',
'org.opensearch.sql.datasource.model.DataSource',
'org.opensearch.sql.datasource.model.DataSourceStatus',
'org.opensearch.sql.datasource.model.DataSourceType'
'org.opensearch.sql.datasource.model.DataSourceType',
'org.opensearch.sql.executor.ExecutionEngine'
]
limit {
counter = 'LINE'
minimum = 1.0
minimum = 0.5 // calcite dev only
}
limit {
counter = 'BRANCH'
minimum = 1.0
minimum = 0.5 // calcite dev only
}
}
}
afterEvaluate {
classDirectories.setFrom(files(classDirectories.files.collect {
fileTree(dir: it,
exclude: ['**/ast/**'])
exclude: ['**/ast/**', '**/calcite/**']) // calcite dev only
}))
}
}
Expand Down
36 changes: 25 additions & 11 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.Sort.SortOption;
import org.opensearch.sql.ast.tree.SubqueryAlias;
import org.opensearch.sql.ast.tree.TableFunction;
import org.opensearch.sql.ast.tree.Trendline;
import org.opensearch.sql.ast.tree.UnresolvedPlan;
Expand Down Expand Up @@ -143,6 +144,27 @@ public LogicalPlan analyze(UnresolvedPlan unresolved, AnalysisContext context) {
return unresolved.accept(this, context);
}

@Override
public LogicalPlan visitSubqueryAlias(SubqueryAlias node, AnalysisContext context) {
LogicalPlan child = analyze(node.getChild().get(0), context);
if (child instanceof LogicalRelation) {
// Put index name or its alias in index namespace on type environment so qualifier
// can be removed when analyzing qualified name. The value (expr type) here doesn't matter.
TypeEnvironment curEnv = context.peek();
curEnv.define(
new Symbol(
Namespace.INDEX_NAME,
(node.getAlias() == null)
? ((LogicalRelation) child).getRelationName()
: node.getAlias()),
STRUCT);
return child;
} else {
// TODO
throw new UnsupportedOperationException("SubqueryAlias is only supported in table alias");
}
}

@Override
public LogicalPlan visitRelation(Relation node, AnalysisContext context) {
QualifiedName qualifiedName = node.getTableQualifiedName();
Expand Down Expand Up @@ -170,12 +192,6 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) {
.getReservedFieldTypes()
.forEach((k, v) -> curEnv.define(new Symbol(Namespace.HIDDEN_FIELD_NAME, k), v));

// Put index name or its alias in index namespace on type environment so qualifier
// can be removed when analyzing qualified name. The value (expr type) here doesn't matter.
curEnv.define(
new Symbol(Namespace.INDEX_NAME, (node.getAlias() == null) ? tableName : node.getAlias()),
STRUCT);

return new LogicalRelation(tableName, table);
}

Expand Down Expand Up @@ -306,7 +322,7 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) {
for (UnresolvedExpression expr : node.getAggExprList()) {
NamedExpression aggExpr = namedExpressionAnalyzer.analyze(expr, context);
aggregatorBuilder.add(
new NamedAggregator(aggExpr.getNameOrAlias(), (Aggregator) aggExpr.getDelegated()));
new NamedAggregator(aggExpr.getName(), (Aggregator) aggExpr.getDelegated()));
}

ImmutableList.Builder<NamedExpression> groupbyBuilder = new ImmutableList.Builder<>();
Expand All @@ -331,8 +347,7 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) {
newEnv.define(
new Symbol(Namespace.FIELD_NAME, aggregator.getName()), aggregator.type()));
groupBys.forEach(
group ->
newEnv.define(new Symbol(Namespace.FIELD_NAME, group.getNameOrAlias()), group.type()));
group -> newEnv.define(new Symbol(Namespace.FIELD_NAME, group.getName()), group.type()));
return new LogicalAggregation(child, aggregators, groupBys);
}

Expand Down Expand Up @@ -425,8 +440,7 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) {
context.push();
TypeEnvironment newEnv = context.peek();
namedExpressions.forEach(
expr ->
newEnv.define(new Symbol(Namespace.FIELD_NAME, expr.getNameOrAlias()), expr.type()));
expr -> newEnv.define(new Symbol(Namespace.FIELD_NAME, expr.getName()), expr.type()));
List<NamedExpression> namedParseExpressions = context.getNamedParseExpressions();
return new LogicalProject(child, namedExpressions, namedParseExpressions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ private Expression visitMetadata(
private Expression visitIdentifier(String ident, AnalysisContext context) {
// ParseExpression will always override ReferenceExpression when ident conflicts
for (NamedExpression expr : context.getNamedParseExpressions()) {
if (expr.getNameOrAlias().equals(ident) && expr.getDelegated() instanceof ParseExpression) {
if (expr.getName().equals(ident) && expr.getDelegated() instanceof ParseExpression) {
return expr.getDelegated();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public Void visitAggregation(LogicalAggregation plan, Void context) {
groupBy ->
expressionMap.put(
groupBy.getDelegated(),
new ReferenceExpression(groupBy.getNameOrAlias(), groupBy.type())));
new ReferenceExpression(groupBy.getName(), groupBy.type())));
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import lombok.RequiredArgsConstructor;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.NamedExpression;
Expand All @@ -28,18 +27,6 @@ public NamedExpression analyze(UnresolvedExpression expression, AnalysisContext

@Override
public NamedExpression visitAlias(Alias node, AnalysisContext context) {
return DSL.named(
unqualifiedNameIfFieldOnly(node, context),
node.getDelegated().accept(expressionAnalyzer, context),
node.getAlias());
}

private String unqualifiedNameIfFieldOnly(Alias node, AnalysisContext context) {
UnresolvedExpression selectItem = node.getDelegated();
if (selectItem instanceof QualifiedName) {
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);
return qualifierAnalyzer.unqualified((QualifiedName) selectItem);
}
return node.getName();
return DSL.named(node.getName(), node.getDelegated().accept(expressionAnalyzer, context));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ public List<NamedExpression> visitAlias(Alias node, AnalysisContext context) {
}

Expression expr = referenceIfSymbolDefined(node, context);
return Collections.singletonList(
DSL.named(unqualifiedNameIfFieldOnly(node, context), expr, node.getAlias()));
return Collections.singletonList(DSL.named(node.getName(), expr));
}

/**
Expand All @@ -77,7 +76,7 @@ public List<NamedExpression> visitAlias(Alias node, AnalysisContext context) {
* aggExpr)) Agg(Alias("AVG(age)", aggExpr))
* <li>SELECT length(name), AVG(age) FROM s BY length(name) Project(Alias("name", expr),
* Alias("AVG(age)", aggExpr)) Agg(Alias("AVG(age)", aggExpr))
* <li>SELECT length(name) as l, AVG(age) FROM s BY l Project(Alias("name", expr, l),
* <li>SELECT length(name) as l, AVG(age) FROM s BY l Project(Alias("l", expr),
* Alias("AVG(age)", aggExpr)) Agg(Alias("AVG(age)", aggExpr), Alias("length(name)",
* groupExpr))
* </ol>
Expand All @@ -89,7 +88,9 @@ private Expression referenceIfSymbolDefined(Alias expr, AnalysisContext context)
// (OVER clause) and thus depends on name in alias to be replaced correctly
return optimizer.optimize(
DSL.named(
expr.getName(), delegatedExpr.accept(expressionAnalyzer, context), expr.getAlias()),
delegatedExpr.toString(),
delegatedExpr.accept(expressionAnalyzer, context),
expr.getName()),
context);
}

Expand Down Expand Up @@ -128,21 +129,4 @@ public List<NamedExpression> visitNestedAllTupleFields(
})
.collect(Collectors.toList());
}

/**
* Get unqualified name if select item is just a field. For example, suppose an index named
* "accounts", return "age" for "SELECT accounts.age". But do nothing for expression in "SELECT
* ABS(accounts.age)". Note that an assumption is made implicitly that original name field in
* Alias must be the same as the values in QualifiedName. This is true because AST builder does
* this. Otherwise, what unqualified() returns will override Alias's name as NamedExpression's
* name even though the QualifiedName doesn't have qualifier.
*/
private String unqualifiedNameIfFieldOnly(Alias node, AnalysisContext context) {
UnresolvedExpression selectItem = node.getDelegated();
if (selectItem instanceof QualifiedName) {
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);
return qualifierAnalyzer.unqualified((QualifiedName) selectItem);
}
return node.getName();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ public LogicalPlan visitAlias(Alias node, AnalysisContext context) {
List<Pair<SortOption, Expression>> sortList = analyzeSortList(unresolved, context);

WindowDefinition windowDefinition = new WindowDefinition(partitionByList, sortList);
NamedExpression namedWindowFunction =
new NamedExpression(node.getName(), windowFunction, node.getAlias());
NamedExpression namedWindowFunction = new NamedExpression(node.getName(), windowFunction);
List<Pair<SortOption, Expression>> allSortItems = windowDefinition.getAllSortItems();

if (allSortItems.isEmpty()) {
Expand Down
15 changes: 15 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Join;
import org.opensearch.sql.ast.tree.Kmeans;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.Lookup;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
Expand All @@ -59,6 +61,7 @@
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.SubqueryAlias;
import org.opensearch.sql.ast.tree.TableFunction;
import org.opensearch.sql.ast.tree.Trendline;
import org.opensearch.sql.ast.tree.Values;
Expand Down Expand Up @@ -326,4 +329,16 @@ public T visitCloseCursor(CloseCursor closeCursor, C context) {
public T visitFillNull(FillNull fillNull, C context) {
return visitChildren(fillNull, context);
}

public T visitJoin(Join node, C context) {
return visitChildren(node, context);
}

public T visitLookup(Lookup node, C context) {
return visitChildren(node, context);
}

public T visitSubqueryAlias(SubqueryAlias node, C context) {
return visitChildren(node, context);
}
}
19 changes: 17 additions & 2 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.ast.tree.Aggregation;
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.DescribeRelation;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
Expand All @@ -62,6 +63,7 @@
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.Sort.SortOption;
import org.opensearch.sql.ast.tree.SubqueryAlias;
import org.opensearch.sql.ast.tree.TableFunction;
import org.opensearch.sql.ast.tree.Trendline;
import org.opensearch.sql.ast.tree.UnresolvedPlan;
Expand Down Expand Up @@ -89,7 +91,15 @@ public UnresolvedPlan relation(QualifiedName tableName) {
}

public UnresolvedPlan relation(String tableName, String alias) {
return new Relation(qualifiedName(tableName), alias);
return new SubqueryAlias(alias, new Relation(qualifiedName(tableName)));
}

public UnresolvedPlan describe(String tableName) {
return new DescribeRelation(qualifiedName(tableName));
}

public UnresolvedPlan subqueryAlias(UnresolvedPlan child, String alias) {
return new SubqueryAlias(child, alias);
}

public UnresolvedPlan tableFunction(List<String> functionName, UnresolvedExpression... args) {
Expand Down Expand Up @@ -385,8 +395,13 @@ public Alias alias(String name, UnresolvedExpression expr) {
return new Alias(name, expr);
}

@Deprecated
public Alias alias(String name, UnresolvedExpression expr, String alias) {
return new Alias(name, expr, alias);
if (alias == null) {
return new Alias(name, expr);
} else {
return new Alias(alias, expr);
}
}

public NestedAllTupleFields nestedAllTupleFields(String path) {
Expand Down
15 changes: 5 additions & 10 deletions core/src/main/java/org/opensearch/sql/ast/expression/Alias.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,30 @@

package org.opensearch.sql.ast.expression;

import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;

/**
* Alias abstraction that associate an unnamed expression with a name and an optional alias. The
* name and alias information preserved is useful for semantic analysis and response formatting
* eventually. This can avoid restoring the info in toString() method which is inaccurate because
* original info is already lost.
* Alias abstraction that associate an unnamed expression with a name. The name information
* preserved is useful for semantic analysis and response formatting eventually. This can avoid
* restoring the info in toString() method which is inaccurate because original info is already
* lost.
*/
@AllArgsConstructor
@EqualsAndHashCode(callSuper = false)
@Getter
@RequiredArgsConstructor
@ToString
public class Alias extends UnresolvedExpression {

/** Original field name. */
/** The name to be associated with the result of computing delegated expression. */
private final String name;

/** Expression aliased. */
private final UnresolvedExpression delegated;

/** Optional field alias. */
private String alias;

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitAlias(this, context);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import java.util.Collections;
import lombok.EqualsAndHashCode;
import lombok.ToString;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

/** Extend Relation to describe the table itself */
@ToString
@EqualsAndHashCode(callSuper = false)
public class DescribeRelation extends Relation {
public DescribeRelation(UnresolvedExpression tableName) {
super(Collections.singletonList(tableName));
}
}
Loading

0 comments on commit 4b93d60

Please sign in to comment.