Skip to content

Commit

Permalink
Introduce ApproximateRangeQuery and ApproximateQuery (#13788)
Browse files Browse the repository at this point in the history
This introduces a basic "approximation" framework that improves
query performance by modifying the query in a way that should be
functionally equivalent.

To start, we can reduce the bounds of a range query in order to
satisfy the `track_total_hits` value (which defaults to 10,000).

---------

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>
Signed-off-by: Michael Froh <froh@amazon.com>
Co-authored-by: Michael Froh <froh@amazon.com>
  • Loading branch information
harshavamsi and msfroh authored Sep 2, 2024
1 parent 738cdd3 commit 2e9db40
Show file tree
Hide file tree
Showing 20 changed files with 1,622 additions and 58 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290))
- Add fieldType to AbstractQueryBuilder and FieldSortBuilder ([#15328](https://github.com/opensearch-project/OpenSearch/pull/15328)))
- [Reader Writer Separation] Add searchOnly replica routing configuration ([#15410](https://github.com/opensearch-project/OpenSearch/pull/15410))
- [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788))
- [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527))
- Add support to upload snapshot shard blobs with hashed prefix ([#15426](https://github.com/opensearch-project/OpenSearch/pull/15426))
- [Remote Publication] Add remote download stats ([#15291](https://github.com/opensearch-project/OpenSearch/pull/15291)))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
---
"search with approximate range":
- do:
indices.create:
index: test
body:
mappings:
properties:
date:
type: date
index: true
doc_values: true

- do:
bulk:
index: test
refresh: true
body:
- '{"index": {"_index": "test", "_id": "1" }}'
- '{ "date": "2018-10-29T12:12:12.987Z" }'
- '{ "index": { "_index": "test", "_id": "2" }}'
- '{ "date": "2020-10-29T12:12:12.987Z" }'
- '{ "index": { "_index": "test", "_id": "3" } }'
- '{ "date": "2024-10-29T12:12:12.987Z" }'

- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
range: {
date: {
gte: "2018-10-29T12:12:12.987Z"
},
}

- match: { hits.total: 3 }

- do:
search:
rest_total_hits_as_int: true
index: test
body:
sort: [{ date: asc }]
query:
range: {
date: {
gte: "2018-10-29T12:12:12.987Z"
},
}


- match: { hits.total: 3 }
- match: { hits.hits.0._id: "1" }

- do:
search:
rest_total_hits_as_int: true
index: test
body:
sort: [{ date: desc }]
query:
range: {
date: {
gte: "2018-10-29T12:12:12.987Z",
lte: "2020-10-29T12:12:12.987Z"
},
}

- match: { hits.total: 2 }
- match: { hits.hits.0._id: "2" }
10 changes: 10 additions & 0 deletions server/src/main/java/org/opensearch/common/util/FeatureFlags.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ public class FeatureFlags {
Property.NodeScope
);

/**
* Gates the functionality of ApproximatePointRangeQuery where we approximate query results.
*/
public static final String APPROXIMATE_POINT_RANGE_QUERY = "opensearch.experimental.feature.approximate_point_range_query.enabled";
public static final Setting<Boolean> APPROXIMATE_POINT_RANGE_QUERY_SETTING = Setting.boolSetting(
APPROXIMATE_POINT_RANGE_QUERY,
false,
Property.NodeScope
);

private static final List<Setting<Boolean>> ALL_FEATURE_FLAG_SETTINGS = List.of(
REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING,
EXTENSIONS_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
import org.opensearch.index.query.QueryRewriteContext;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery;
import org.opensearch.search.approximate.ApproximatePointRangeQuery;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
Expand All @@ -80,6 +82,7 @@
import java.util.function.Supplier;

import static org.opensearch.common.time.DateUtils.toLong;
import static org.apache.lucene.document.LongPoint.pack;

/**
* A {@link FieldMapper} for dates.
Expand Down Expand Up @@ -108,6 +111,21 @@ public static DateFormatter getDefaultDateTimeFormatter() {
: LEGACY_DEFAULT_DATE_TIME_FORMATTER;
}

public static Query getDefaultQuery(Query pointRangeQuery, Query dvQuery, String name, long l, long u) {
return FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY_SETTING)
? new ApproximateIndexOrDocValuesQuery(
pointRangeQuery,
new ApproximatePointRangeQuery(name, pack(new long[] { l }).bytes, pack(new long[] { u }).bytes, new long[] { l }.length) {
@Override
protected String toString(int dimension, byte[] value) {
return Long.toString(LongPoint.decodeDimension(value, 0));
}
},
dvQuery
)
: new IndexOrDocValuesQuery(pointRangeQuery, dvQuery);
}

/**
* Resolution of the date time
*
Expand Down Expand Up @@ -463,24 +481,22 @@ public Query rangeQuery(
}
DateMathParser parser = forcedDateParser == null ? dateMathParser : forcedDateParser;
return dateRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, timeZone, parser, context, resolution, (l, u) -> {
Query pointRangeQuery = isSearchable() ? LongPoint.newRangeQuery(name(), l, u) : null;
Query dvQuery = hasDocValues() ? SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u) : null;
if (isSearchable() && hasDocValues()) {
Query query = LongPoint.newRangeQuery(name(), l, u);
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
query = new IndexOrDocValuesQuery(query, dvQuery);

Query query = getDefaultQuery(pointRangeQuery, dvQuery, name(), l, u);
if (context.indexSortedOnField(name())) {
query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query);
}
return query;
}
if (hasDocValues()) {
Query query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
if (context.indexSortedOnField(name())) {
query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query);
dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, dvQuery);
}
return query;
return dvQuery;
}
return LongPoint.newRangeQuery(name(), l, u);
return pointRangeQuery;
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.opensearch.common.lucene.search.function.FunctionScoreQuery;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.query.DateRangeIncludingNowQuery;
import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery;
import org.opensearch.search.internal.SearchContext;

import java.io.IOException;
Expand Down Expand Up @@ -54,6 +55,7 @@ private Helper() {}
queryWrappers.put(FunctionScoreQuery.class, q -> ((FunctionScoreQuery) q).getSubQuery());
queryWrappers.put(DateRangeIncludingNowQuery.class, q -> ((DateRangeIncludingNowQuery) q).getQuery());
queryWrappers.put(IndexOrDocValuesQuery.class, q -> ((IndexOrDocValuesQuery) q).getIndexQuery());
queryWrappers.put(ApproximateIndexOrDocValuesQuery.class, q -> ((ApproximateIndexOrDocValuesQuery) q).getOriginalQuery());
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.approximate;

import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;

/**
* A wrapper around {@link IndexOrDocValuesQuery} that can be used to run approximate queries.
* It delegates to either {@link ApproximateQuery} or {@link IndexOrDocValuesQuery} based on whether the query can be approximated or not.
* @see ApproximateQuery
*/
public final class ApproximateIndexOrDocValuesQuery extends ApproximateScoreQuery {

private final ApproximateQuery approximateIndexQuery;
private final IndexOrDocValuesQuery indexOrDocValuesQuery;

public ApproximateIndexOrDocValuesQuery(Query indexQuery, ApproximateQuery approximateIndexQuery, Query dvQuery) {
super(new IndexOrDocValuesQuery(indexQuery, dvQuery), approximateIndexQuery);
this.approximateIndexQuery = approximateIndexQuery;
this.indexOrDocValuesQuery = new IndexOrDocValuesQuery(indexQuery, dvQuery);
}

@Override
public String toString(String field) {
return "ApproximateIndexOrDocValuesQuery(indexQuery="
+ indexOrDocValuesQuery.getIndexQuery().toString(field)
+ ", approximateIndexQuery="
+ approximateIndexQuery.toString(field)
+ ", dvQuery="
+ indexOrDocValuesQuery.getRandomAccessQuery().toString(field)
+ ")";
}

@Override
public void visit(QueryVisitor visitor) {
indexOrDocValuesQuery.visit(visitor);
}

@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
return true;
}

@Override
public int hashCode() {
int h = classHash();
h = 31 * h + indexOrDocValuesQuery.getIndexQuery().hashCode();
h = 31 * h + indexOrDocValuesQuery.getRandomAccessQuery().hashCode();
return h;
}
}
Loading

0 comments on commit 2e9db40

Please sign in to comment.