Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Lucene query pushdown optimization #671

Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ public PhysicalPlan implement(LogicalPlan plan) {
return plan.accept(new DefaultImplementor<ElasticsearchIndexScan>() {
@Override
public PhysicalPlan visitFilter(LogicalFilter node, ElasticsearchIndexScan context) {
// For now (without optimizer), only push down filter close to relation
if (!(node.getChild().get(0) instanceof LogicalRelation)) {
return super.visitFilter(node, context);
}

FilterQueryBuilder queryBuilder =
new FilterQueryBuilder(new DefaultExpressionSerializer());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage;

import static org.elasticsearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME;
import static org.elasticsearch.search.sort.SortOrder.ASC;

import com.amazon.opendistroforelasticsearch.sql.common.setting.Settings;
import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.client.ElasticsearchClient;
Expand Down Expand Up @@ -102,14 +105,18 @@ public void pushDown(QueryBuilder query) {
if (current == null) {
source.query(query);
} else {
if (isBoolMustQuery(current)) {
((BoolQueryBuilder) current).must(query);
if (isBoolFilterQuery(current)) {
((BoolQueryBuilder) current).filter(query);
} else {
source.query(QueryBuilders.boolQuery()
.must(current)
.must(query));
.filter(current)
.filter(query));
}
}

if (source.sorts() == null) {
source.sort(DOC_FIELD_NAME, ASC); // Make sure consistent order
}
}

@Override
Expand All @@ -119,9 +126,9 @@ public void close() {
client.cleanup(request);
}

private boolean isBoolMustQuery(QueryBuilder current) {
private boolean isBoolFilterQuery(QueryBuilder current) {
return (current instanceof BoolQueryBuilder)
&& !((BoolQueryBuilder) current).must().isEmpty();
&& !((BoolQueryBuilder) current).filter().isEmpty();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,19 @@
import static java.util.Collections.emptyMap;
import static org.elasticsearch.script.Script.DEFAULT_SCRIPT_TYPE;

import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene.LuceneQuery;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene.RangeQuery;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene.RangeQuery.Comparison;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene.TermQuery;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene.WildcardQuery;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.serialization.ExpressionSerializer;
import com.amazon.opendistroforelasticsearch.sql.expression.Expression;
import com.amazon.opendistroforelasticsearch.sql.expression.ExpressionNodeVisitor;
import com.amazon.opendistroforelasticsearch.sql.expression.FunctionExpression;
import com.amazon.opendistroforelasticsearch.sql.expression.function.BuiltinFunctionName;
import com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionName;
import com.google.common.collect.ImmutableMap;
import java.util.Map;
import java.util.function.BiFunction;
import lombok.RequiredArgsConstructor;
import org.elasticsearch.index.query.BoolQueryBuilder;
Expand All @@ -40,6 +49,19 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor<QueryBuilder, Obje
*/
private final ExpressionSerializer serializer;

/**
* Mapping from function name to lucene query builder.
*/
private final Map<FunctionName, LuceneQuery> luceneQueries =
ImmutableMap.<FunctionName, LuceneQuery>builder()
.put(BuiltinFunctionName.EQUAL.getName(), new TermQuery())
.put(BuiltinFunctionName.LESS.getName(), new RangeQuery(Comparison.LT))
.put(BuiltinFunctionName.GREATER.getName(), new RangeQuery(Comparison.GT))
.put(BuiltinFunctionName.LTE.getName(), new RangeQuery(Comparison.LTE))
.put(BuiltinFunctionName.GTE.getName(), new RangeQuery(Comparison.GTE))
.put(BuiltinFunctionName.LIKE.getName(), new WildcardQuery())
.build();

/**
* Build Elasticsearch filter query from expression.
* @param expr expression
Expand All @@ -55,16 +77,22 @@ public QueryBuilder build(Expression expr) {
}

@Override
public QueryBuilder visitFunction(FunctionExpression node, Object context) {
switch (node.getFunctionName().getFunctionName()) {
public QueryBuilder visitFunction(FunctionExpression func, Object context) {
FunctionName name = func.getFunctionName();
switch (name.getFunctionName()) {
case "and":
return buildBoolQuery(node, context, BoolQueryBuilder::must);
return buildBoolQuery(func, context, BoolQueryBuilder::filter);
case "or":
return buildBoolQuery(node, context, BoolQueryBuilder::should);
return buildBoolQuery(func, context, BoolQueryBuilder::should);
case "not":
return buildBoolQuery(node, context, BoolQueryBuilder::mustNot);
default:
return buildScriptQuery(node);
return buildBoolQuery(func, context, BoolQueryBuilder::mustNot);
default: {
LuceneQuery query = luceneQueries.get(name);
if (query != null && query.canSupport(func)) {
return query.build(func);
}
return buildScriptQuery(func);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene;

import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import com.amazon.opendistroforelasticsearch.sql.expression.FunctionExpression;
import com.amazon.opendistroforelasticsearch.sql.expression.LiteralExpression;
import com.amazon.opendistroforelasticsearch.sql.expression.ReferenceExpression;
import org.elasticsearch.index.query.QueryBuilder;

/**
* Lucene query abstraction that builds Lucene query from function expression.
*/
public abstract class LuceneQuery {

/**
* Check if function expression supported by current Lucene query.
* Default behavior is that report supported if:
* 1. Left is a reference
* 2. Right side is a literal
*
* @param func function
* @return return true if supported, otherwise false.
*/
public boolean canSupport(FunctionExpression func) {
return (func.getArguments().size() == 2)
&& (func.getArguments().get(0) instanceof ReferenceExpression)
&& (func.getArguments().get(1) instanceof LiteralExpression);
}

/**
* Build Lucene query from function expression.
*
* @param func function
* @return query
*/
public QueryBuilder build(FunctionExpression func) {
ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0);
LiteralExpression literal = (LiteralExpression) func.getArguments().get(1);
return doBuild(ref.getAttr(), ref.type(), literal.valueOf(null));
}

/**
* Build method that subclass implements by default which is to build query
* from reference and literal in function arguments.
*
* @param fieldName field name
* @param fieldType expr fieldType
* @param literal expr literal
* @return query
*/
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
throw new UnsupportedOperationException(
"Subclass doesn't implement this and build method either");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene;

import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import lombok.RequiredArgsConstructor;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;

/**
* Lucene query that builds range query for non-quality comparison.
*/
@RequiredArgsConstructor
public class RangeQuery extends LuceneQuery {

public enum Comparison {
LT, GT, LTE, GTE, BETWEEN
}

/**
* Comparison that range query build for.
*/
private final Comparison comparison;

@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
Object value = literal.value();

RangeQueryBuilder query = QueryBuilders.rangeQuery(fieldName);
switch (comparison) {
case LT:
return query.lt(value);
case GT:
return query.gt(value);
case LTE:
return query.lte(value);
case GTE:
return query.gte(value);
default:
throw new IllegalStateException("Comparison is supported by range query: " + comparison);
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene;

import static com.amazon.opendistroforelasticsearch.sql.elasticsearch.data.type.ElasticsearchDataType.ES_TEXT_KEYWORD;

import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;

/**
* Lucene query that build term query for equality comparison.
*/
public class TermQuery extends LuceneQuery {

@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
if (fieldType == ES_TEXT_KEYWORD) { // Assume inner field name is always "keyword"
dai-chen marked this conversation as resolved.
Show resolved Hide resolved
fieldName += ".keyword";
}
return QueryBuilders.termQuery(fieldName, literal.value());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene;

import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;

/**
* Lucene query that builds wildcard query.
*/
public class WildcardQuery extends LuceneQuery {

@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
String matchText = convertSqlWildcardToLucene(literal.stringValue());
return QueryBuilders.wildcardQuery(fieldName, matchText);
}

private String convertSqlWildcardToLucene(String text) {
return text.replace('%', '*')
.replace('_', '?');
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage;

import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.STRING;
import static org.elasticsearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME;
import static org.elasticsearch.search.sort.SortOrder.ASC;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
Expand Down Expand Up @@ -108,14 +110,14 @@ void pushDownFilters() {
.pushDown(QueryBuilders.termQuery("age", 30))
.shouldQuery(
QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("name", "John"))
.must(QueryBuilders.termQuery("age", 30)))
.filter(QueryBuilders.termQuery("name", "John"))
.filter(QueryBuilders.termQuery("age", 30)))
.pushDown(QueryBuilders.rangeQuery("balance").gte(10000))
.shouldQuery(
QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("name", "John"))
.must(QueryBuilders.termQuery("age", 30))
.must(QueryBuilders.rangeQuery("balance").gte(10000)));
.filter(QueryBuilders.termQuery("name", "John"))
.filter(QueryBuilders.termQuery("age", 30))
.filter(QueryBuilders.rangeQuery("balance").gte(10000)));
}

private PushDownAssertion assertThat() {
Expand Down Expand Up @@ -143,7 +145,9 @@ PushDownAssertion pushDown(QueryBuilder query) {

PushDownAssertion shouldQuery(QueryBuilder expected) {
ElasticsearchRequest request = new ElasticsearchQueryRequest("test", 200);
request.getSourceBuilder().query(expected);
request.getSourceBuilder()
.query(expected)
.sort(DOC_FIELD_NAME, ASC);
when(client.search(request)).thenReturn(response);
indexScan.open();
return this;
Expand Down
Loading