Skip to content

Commit

Permalink
take field density into consideration
Browse files Browse the repository at this point in the history
Signed-off-by: panguixin <panguixin@bytedance.com>
  • Loading branch information
bugmakerrrrrr committed Aug 7, 2024
1 parent 47f5073 commit 3a8d66e
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 77 deletions.
21 changes: 15 additions & 6 deletions server/src/main/java/org/opensearch/search/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@
import org.opensearch.search.rescore.RescorerBuilder;
import org.opensearch.search.searchafter.SearchAfterBuilder;
import org.opensearch.search.sort.FieldSortBuilder;
import org.opensearch.search.sort.FieldStats;
import org.opensearch.search.sort.MinAndMax;
import org.opensearch.search.sort.SortAndFormats;
import org.opensearch.search.sort.SortBuilder;
Expand Down Expand Up @@ -1627,7 +1628,7 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
final SortAndFormats primarySort = sortBuilder != null
? SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get()
: null;
MinAndMax<?> minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null;
FieldStats stats = sortBuilder != null ? FieldSortBuilder.getFieldStatsForShard(context, sortBuilder) : FieldStats.UNKNOWN;
boolean canMatch;
if (canRewriteToMatchNone(request.source())) {
QueryBuilder queryBuilder = request.source().query();
Expand All @@ -1638,9 +1639,16 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
}
final FieldDoc searchAfterFieldDoc = getSearchAfterFieldDoc(request, context);
final Integer trackTotalHitsUpto = request.source() == null ? null : request.source().trackTotalHitsUpTo();
canMatch = canMatch && canMatchSearchAfter(searchAfterFieldDoc, minMax, primarySort, trackTotalHitsUpto);
canMatch = canMatch
&& canMatchSearchAfter(
searchAfterFieldDoc,
stats.getMinAndMax(),
primarySort,
trackTotalHitsUpto,
stats.allDocsNonMissing()
);

return new CanMatchResponse(canMatch || hasRefreshPending, minMax);
return new CanMatchResponse(canMatch || hasRefreshPending, stats.getMinAndMax());
}
}
}
Expand All @@ -1649,7 +1657,8 @@ public static boolean canMatchSearchAfter(
FieldDoc searchAfter,
MinAndMax<?> minMax,
SortAndFormats primarySort,
Integer trackTotalHitsUpto
Integer trackTotalHitsUpto,
boolean allDocsNonMissing
) {
// Check for sort.missing == null, since in case of missing values sort queries, if segment/shard's min/max
// is out of search_after range, it still should be printed and hence we should not skip segment/shard.
Expand All @@ -1665,12 +1674,12 @@ public static boolean canMatchSearchAfter(
if (primarySortField.getReverse()) {
if (minMax.compareMin(searchAfterPrimary) > 0) {
// In Desc order, if segment/shard minimum is gt search_after, the segment/shard won't be competitive
return canMatchMissingValue(primarySortField, searchAfterPrimary);
return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary);
}
} else {
if (minMax.compareMax(searchAfterPrimary) < 0) {
// In ASC order, if segment/shard maximum is lt search_after, the segment/shard won't be competitive
return canMatchMissingValue(primarySortField, searchAfterPrimary);
return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
import org.opensearch.search.query.QueryPhase;
import org.opensearch.search.query.QuerySearchResult;
import org.opensearch.search.sort.FieldSortBuilder;
import org.opensearch.search.sort.MinAndMax;
import org.opensearch.search.sort.FieldStats;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -517,17 +517,19 @@ private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException {
// Only applied on primary sort field and primary search_after.
FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(searchContext.request().source());
if (primarySortField != null) {
MinAndMax<?> minMax = FieldSortBuilder.getMinMaxOrNullForSegment(
FieldStats stats = FieldSortBuilder.getFieldStatsForSegment(
this.searchContext.getQueryShardContext(),
ctx,
primarySortField,
searchContext.sort()
);
assert stats != null;
return SearchService.canMatchSearchAfter(
searchContext.searchAfter(),
minMax,
stats.getMinAndMax(),
searchContext.sort(),
searchContext.trackTotalHitsUpTo()
searchContext.trackTotalHitsUpTo(),
stats.allDocsNonMissing()
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -606,73 +606,77 @@ public static FieldSortBuilder getPrimaryFieldSortOrNull(SearchSourceBuilder sou
}

/**
* Return the {@link MinAndMax} indexed value for shard from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
* Return the {@link FieldStats} indexed value for shard from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown.
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
* and configurations return <code>null</code>.
* and configurations return {@link FieldStats#UNKNOWN}.
*/
public static MinAndMax<?> getMinMaxOrNull(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException {
public static FieldStats getFieldStatsForShard(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException {
final SortAndFormats sort = SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get();
return getMinMaxOrNullInternal(context.getIndexReader(), context, sortBuilder, sort);
return getFieldStatsInternal(context.getIndexReader(), context, sortBuilder, sort);
}

/**
* Return the {@link MinAndMax} indexed value for segment from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
* Return the {@link FieldStats} indexed value for segment from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown.
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
* and configurations return <code>null</code>.
* and configurations return {@link FieldStats#UNKNOWN}.
*/
public static MinAndMax<?> getMinMaxOrNullForSegment(
public static FieldStats getFieldStatsForSegment(
QueryShardContext context,
LeafReaderContext ctx,
FieldSortBuilder sortBuilder,
SortAndFormats sort
) throws IOException {
return getMinMaxOrNullInternal(ctx.reader(), context, sortBuilder, sort);
return getFieldStatsInternal(ctx.reader(), context, sortBuilder, sort);
}

private static MinAndMax<?> getMinMaxOrNullInternal(
private static FieldStats getFieldStatsInternal(
IndexReader reader,
QueryShardContext context,
FieldSortBuilder sortBuilder,
SortAndFormats sort
) throws IOException {
SortField sortField = sort.sort.getSort()[0];
if (sortField.getField() == null) {
return null;
return FieldStats.UNKNOWN;
}
MappedFieldType fieldType = context.fieldMapper(sortField.getField());
if (reader == null || (fieldType == null || fieldType.isSearchable() == false)) {
return null;
return FieldStats.UNKNOWN;
}
switch (IndexSortConfig.getSortFieldType(sortField)) {
case LONG:
case INT:
case DOUBLE:
case FLOAT:
return extractNumericMinAndMax(reader, sortField, fieldType, sortBuilder);
return extractNumericFieldStats(reader, sortField, fieldType, sortBuilder);
case STRING:
case STRING_VAL:
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
Terms terms = MultiTerms.getTerms(reader, fieldType.name());
if (terms == null) {
return null;
return FieldStats.UNKNOWN;
}
return terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null;
MinAndMax<?> minAndMax = terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null;
return new FieldStats(minAndMax, terms.getDocCount() == reader.maxDoc());
}
break;
}
return null;
return FieldStats.UNKNOWN;
}

private static MinAndMax<?> extractNumericMinAndMax(
private static FieldStats extractNumericFieldStats(
IndexReader reader,
SortField sortField,
MappedFieldType fieldType,
FieldSortBuilder sortBuilder
) throws IOException {
String fieldName = fieldType.name();
if (PointValues.size(reader, fieldName) == 0) {
return null;
final int docCount = PointValues.getDocCount(reader, fieldName);
if (docCount == 0) {
return FieldStats.UNKNOWN;
}
final boolean allDocsNonMissing = docCount == reader.maxDoc();
MinAndMax<?> minAndMax = null;
if (fieldType instanceof NumberFieldType) {
NumberFieldType numberFieldType = (NumberFieldType) fieldType;
Number minPoint = numberFieldType.parsePoint(PointValues.getMinPackedValue(reader, fieldName));
Expand All @@ -681,27 +685,31 @@ private static MinAndMax<?> extractNumericMinAndMax(
case LONG:
if (numberFieldType.numericType() == NumericType.UNSIGNED_LONG) {
// The min and max are expected to be BigInteger numbers
return new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint);
minAndMax = new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint);
} else {
return new MinAndMax<>(minPoint.longValue(), maxPoint.longValue());
minAndMax = new MinAndMax<>(minPoint.longValue(), maxPoint.longValue());
}
break;
case INT:
return new MinAndMax<>(minPoint.intValue(), maxPoint.intValue());
minAndMax = new MinAndMax<>(minPoint.intValue(), maxPoint.intValue());
break;
case DOUBLE:
return new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue());
minAndMax = new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue());
break;
case FLOAT:
return new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue());
minAndMax = new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue());
break;
default:
return null;
// no-op
}
} else if (fieldType instanceof DateFieldType) {
DateFieldType dateFieldType = (DateFieldType) fieldType;
Function<byte[], Long> dateConverter = createDateConverter(sortBuilder, dateFieldType);
Long min = dateConverter.apply(PointValues.getMinPackedValue(reader, fieldName));
Long max = dateConverter.apply(PointValues.getMaxPackedValue(reader, fieldName));
return new MinAndMax<>(min, max);
minAndMax = new MinAndMax<>(min, max);
}
return null;
return new FieldStats(minAndMax, allDocsNonMissing);
}

private static Function<byte[], Long> createDateConverter(FieldSortBuilder sortBuilder, DateFieldType dateFieldType) {
Expand Down
40 changes: 40 additions & 0 deletions server/src/main/java/org/opensearch/search/sort/FieldStats.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.sort;

/**
* A class that encapsulates some stats about a field, including min/max etc.
*
* @opensearch.internal
*/
public class FieldStats {
public static final FieldStats UNKNOWN = new FieldStats(null, false);

private final MinAndMax<?> minAndMax;
private final boolean allDocsNonMissing;

public FieldStats(MinAndMax<?> minAndMax, boolean allDocsNonMissing) {
this.minAndMax = minAndMax;
this.allDocsNonMissing = allDocsNonMissing;
}

/**
* Return the minimum and maximum value.
*/
public MinAndMax<?> getMinAndMax() {
return minAndMax;
}

/**
* Indicates whether all docs have values for corresponding field
*/
public boolean allDocsNonMissing() {
return allDocsNonMissing;
}
}
Loading

0 comments on commit 3a8d66e

Please sign in to comment.