From 16b7c191824af1026100c1d21a1946464e3687d1 Mon Sep 17 00:00:00 2001 From: Dharin Shah <8616130+Dharin-shah@users.noreply.github.com> Date: Sun, 17 Dec 2023 20:02:43 +0100 Subject: [PATCH] add support for scored named queries Signed-off-by: Dharin Shah <8616130+Dharin-shah@users.noreply.github.com> --- CHANGELOG.md | 1 + .../core/common/io/stream/StreamInput.java | 71 +++++++++ .../resources/rest-api-spec/api/search.json | 5 + .../test/search/350_matched_queries.yml | 103 ++++++++++++ .../fetch/subphase/MatchedQueriesIT.java | 96 ++++++----- .../search/functionscore/QueryRescorerIT.java | 2 +- .../rest/action/search/RestSearchAction.java | 6 +- .../java/org/opensearch/search/SearchHit.java | 137 +++++++++------- .../fetch/subphase/MatchedQueriesPhase.java | 32 ++-- .../common/io/stream/BytesStreamsTests.java | 34 ++++ .../org/opensearch/search/SearchHitTests.java | 150 +++++++++++++++++- 11 files changed, 522 insertions(+), 115 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/350_matched_queries.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index c5ea3f83bffc7..e10e795fbec70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -121,6 +121,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Adding slf4j license header to LoggerMessageFormat.java ([#11069](https://github.com/opensearch-project/OpenSearch/pull/11069)) - [BWC and API enforcement] Introduce checks for enforcing the API restrictions ([#11175](https://github.com/opensearch-project/OpenSearch/pull/11175)) - Create separate transport action for render search template action ([#11170](https://github.com/opensearch-project/OpenSearch/pull/11170)) +- Backwards compatible support for returning scores in matched queries ((https://github.com/opensearch-project/OpenSearch/pull/11549)) ### Dependencies - Bump Lucene from 9.7.0 to 9.8.0 ([10276](https://github.com/opensearch-project/OpenSearch/pull/10276)) diff --git a/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java index 3e996bdee83a2..ac4e0f50b924c 100644 --- a/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java +++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java @@ -632,6 +632,77 @@ public String[] readOptionalStringArray() throws IOException { return null; } + + /** + * Reads an ordered {@link Map} from a data source using provided key and value readers. + * + * This method creates a `LinkedHashMap`, ensuring that the iteration order of the map + * matches the order in which the entries were read from the data source. It uses the + * `keyReader` and `valueReader` to read each key-value pair and constructs the map + * with an initial capacity optimized based on the expected size. + * + * Usage example: + *

+     * Map<Integer, String> orderedMap = readOrderedMap(StreamInput::readInteger, StreamInput::readString);
+     * 
+ * + * @param keyReader The {@link Writeable.Reader} used to read the keys of the map. + * @param valueReader The {@link Writeable.Reader} used to read the values of the map. + * @return A {@link LinkedHashMap} containing the keys and values read from the data source. + * The map maintains the order in which keys and values were read. + * @throws IOException If an I/O error occurs during reading from the data source. + */ + public Map readOrderedMap(Writeable.Reader keyReader, Writeable.Reader valueReader) throws IOException { + return readMap(keyReader, valueReader, (expectedSize) -> new LinkedHashMap<>(capacity(expectedSize))); + } + + static int capacity(int expectedSize) { + assert expectedSize >= 0; + return expectedSize < 2 ? expectedSize + 1 : (int) (expectedSize / 0.75 + 1.0); + } + + /** + * Reads a {@link Map} from a data source using provided key and value readers and a map constructor. + * + * This method is a flexible utility for reading a map from a data source, such as a file or network stream. + * It uses the provided `keyReader` and `valueReader` to read each key-value pair. The size of the map is + * determined first by reading the array size. A map constructor is also provided to create a map of the + * appropriate size, allowing for optimized performance based on the expected number of entries. + * + * If the read size is zero, an empty map is returned. Otherwise, the method iterates over the size, + * reading keys and values and adding them to the map. + * + * Usage example: + *

+     * Map<Integer, String> map = readMap(StreamInput::readInteger, StreamInput::readString, HashMap::new);
+     * 
+ * + * Note: If the map is empty, an immutable empty map is returned. Otherwise, a mutable map is created. + * + * @param keyReader The {@link Writeable.Reader} used to read the keys of the map. + * @param valueReader The {@link Writeable.Reader} used to read the values of the map. + * @param constructor A function that takes an integer (the initial size) and returns a new map. + * This allows for the creation of a map with an initial capacity matching the + * expected size, optimizing performance. + * @return A {@link Map} containing the keys and values read from the data source. This map is either + * empty (and immutable) or mutable and filled with the read data. + * @throws IOException If an I/O error occurs during reading from the data source. + */ + private Map readMap(Writeable.Reader keyReader, Writeable.Reader valueReader, IntFunction> constructor) + throws IOException { + int size = readArraySize(); + if (size == 0) { + return Collections.emptyMap(); + } + Map map = constructor.apply(size); + for (int i = 0; i < size; i++) { + K key = keyReader.read(this); + V value = valueReader.read(this); + map.put(key, value); + } + return map; + } + /** * If the returned map contains any entries it will be mutable. If it is empty it might be immutable. */ diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/search.json b/rest-api-spec/src/main/resources/rest-api-spec/api/search.json index e0fbeeb83ffc4..e78d49a67a98a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/search.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/search.json @@ -229,6 +229,11 @@ "search_pipeline": { "type": "string", "description": "The search pipeline to use to execute this request" + }, + "include_named_queries_score":{ + "type": "boolean", + "description":"Indicates whether hit.matched_queries should be rendered as a map that includes the name of the matched query associated with its score (true) or as an array containing the name of the matched queries (false)", + "default":false } }, "body":{ diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/350_matched_queries.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/350_matched_queries.yml new file mode 100644 index 0000000000000..25de51a316bd4 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/350_matched_queries.yml @@ -0,0 +1,103 @@ +setup: + - skip: + version: " - 2.12.0" + reason: "implemented for versions post 2.12.0" + +--- +"matched queries": + - do: + indices.create: + index: test + + - do: + bulk: + refresh: true + body: + - '{ "index" : { "_index" : "test_1", "_id" : "1" } }' + - '{"field" : 1 }' + - '{ "index" : { "_index" : "test_1", "_id" : "2" } }' + - '{"field" : [1, 2] }' + + - do: + search: + index: test_1 + body: + query: + bool: { + should: [ + { + match: { + field: { + query: 1, + _name: match_field_1 + } + } + }, + { + match: { + field: { + query: 2, + _name: match_field_2, + boost: 10 + } + } + } + ] + } + + - match: {hits.total.value: 2} + - length: {hits.hits.0.matched_queries: 2} + - match: {hits.hits.0.matched_queries: [ "match_field_1", "match_field_2" ]} + - length: {hits.hits.1.matched_queries: 1} + - match: {hits.hits.1.matched_queries: [ "match_field_1" ]} + +--- + +"matched queries with scores": + - do: + indices.create: + index: test + + - do: + bulk: + refresh: true + body: + - '{ "index" : { "_index" : "test_1", "_id" : "1" } }' + - '{"field" : 1 }' + - '{ "index" : { "_index" : "test_1", "_id" : "2" } }' + - '{"field" : [1, 2] }' + + - do: + search: + include_named_queries_score: true + index: test_1 + body: + query: + bool: { + should: [ + { + match: { + field: { + query: 1, + _name: match_field_1 + } + } + }, + { + match: { + field: { + query: 2, + _name: match_field_2, + boost: 10 + } + } + } + ] + } + + - match: { hits.total.value: 2 } + - length: { hits.hits.0.matched_queries: 2 } + - match: { hits.hits.0.matched_queries.match_field_1: 1 } + - match: { hits.hits.0.matched_queries.match_field_2: 10 } + - length: { hits.hits.1.matched_queries: 1 } + - match: { hits.hits.1.matched_queries.match_field_1: 1 } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/MatchedQueriesIT.java b/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/MatchedQueriesIT.java index 83cedb8c20e1d..36d3d2cfa59ed 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/MatchedQueriesIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/MatchedQueriesIT.java @@ -62,7 +62,9 @@ import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasItemInArray; +import static org.hamcrest.Matchers.hasKey; public class MatchedQueriesIT extends ParameterizedOpenSearchIntegTestCase { @@ -105,11 +107,13 @@ public void testSimpleMatchedQueryFromFilteredQuery() throws Exception { assertHitCount(searchResponse, 3L); for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("3") || hit.getId().equals("2")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("test2")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("test2")); + assertThat(hit.getMatchedQueryScore("test2"), equalTo(1f)); } else if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("test1")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("test1")); + assertThat(hit.getMatchedQueryScore("test1"), equalTo(1f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -123,11 +127,13 @@ public void testSimpleMatchedQueryFromFilteredQuery() throws Exception { assertHitCount(searchResponse, 3L); for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1") || hit.getId().equals("2")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("test1")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("test1")); + assertThat(hit.getMatchedQueryScore("test1"), equalTo(1f)); } else if (hit.getId().equals("3")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("test2")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("test2")); + assertThat(hit.getMatchedQueryScore("test2"), equalTo(1f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -153,12 +159,15 @@ public void testSimpleMatchedQueryFromTopLevelFilter() throws Exception { assertHitCount(searchResponse, 3L); for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(2)); - assertThat(hit.getMatchedQueries(), hasItemInArray("name")); - assertThat(hit.getMatchedQueries(), hasItemInArray("title")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); + assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); } else if (hit.getId().equals("2") || hit.getId().equals("3")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("name")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -174,12 +183,15 @@ public void testSimpleMatchedQueryFromTopLevelFilter() throws Exception { assertHitCount(searchResponse, 3L); for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(2)); - assertThat(hit.getMatchedQueries(), hasItemInArray("name")); - assertThat(hit.getMatchedQueries(), hasItemInArray("title")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); + assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); } else if (hit.getId().equals("2") || hit.getId().equals("3")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("name")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -203,9 +215,11 @@ public void testSimpleMatchedQueryFromTopLevelFilterAndFilteredQuery() throws Ex assertHitCount(searchResponse, 3L); for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1") || hit.getId().equals("2") || hit.getId().equals("3")) { - assertThat(hit.getMatchedQueries().length, equalTo(2)); - assertThat(hit.getMatchedQueries(), hasItemInArray("name")); - assertThat(hit.getMatchedQueries(), hasItemInArray("title")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); + assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -242,8 +256,9 @@ public void testRegExpQuerySupportsName() throws InterruptedException { for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("regex")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("regex")); + assertThat(hit.getMatchedQueryScore("regex"), equalTo(1f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -265,8 +280,9 @@ public void testPrefixQuerySupportsName() throws InterruptedException { for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("prefix")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("prefix")); + assertThat(hit.getMatchedQueryScore("prefix"), equalTo(1f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -288,8 +304,9 @@ public void testFuzzyQuerySupportsName() throws InterruptedException { for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("fuzzy")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("fuzzy")); + assertThat(hit.getMatchedQueryScore("fuzzy"), greaterThan(0f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -311,8 +328,9 @@ public void testWildcardQuerySupportsName() throws InterruptedException { for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("wildcard")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("wildcard")); + assertThat(hit.getMatchedQueryScore("wildcard"), equalTo(1f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -334,8 +352,9 @@ public void testSpanFirstQuerySupportsName() throws InterruptedException { for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("span")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("span")); + assertThat(hit.getMatchedQueryScore("span"), greaterThan(0f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -369,11 +388,13 @@ public void testMatchedWithShould() throws Exception { assertHitCount(searchResponse, 2L); for (SearchHit hit : searchResponse.getHits()) { if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("dolor")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("dolor")); + assertThat(hit.getMatchedQueryScore("dolor"), greaterThan(0f)); } else if (hit.getId().equals("2")) { - assertThat(hit.getMatchedQueries().length, equalTo(1)); - assertThat(hit.getMatchedQueries(), hasItemInArray("elit")); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("elit")); + assertThat(hit.getMatchedQueryScore("elit"), greaterThan(0f)); } else { fail("Unexpected document returned with id " + hit.getId()); } @@ -397,7 +418,10 @@ public void testMatchedWithWrapperQuery() throws Exception { for (QueryBuilder query : queries) { SearchResponse searchResponse = client().prepareSearch().setQuery(query).get(); assertHitCount(searchResponse, 1L); - assertThat(searchResponse.getHits().getAt(0).getMatchedQueries()[0], equalTo("abc")); + SearchHit hit = searchResponse.getHits().getAt(0); + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("abc")); + assertThat(hit.getMatchedQueryScore("abc"), greaterThan(0f)); } } } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/functionscore/QueryRescorerIT.java b/server/src/internalClusterTest/java/org/opensearch/search/functionscore/QueryRescorerIT.java index bda6284d9535a..a7faad63e759a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/functionscore/QueryRescorerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/functionscore/QueryRescorerIT.java @@ -128,7 +128,7 @@ public void testEnforceWindowSize() throws InterruptedException { new QueryRescorerBuilder( functionScoreQuery(matchAllQuery(), ScoreFunctionBuilders.weightFactorFunction(100)).boostMode( CombineFunction.REPLACE - ) + ).queryName("hello world") ).setQueryWeight(0.0f).setRescoreQueryWeight(1.0f), 1 ) diff --git a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java index 080366e536da1..9b1bea362ffae 100644 --- a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java +++ b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java @@ -86,10 +86,13 @@ public class RestSearchAction extends BaseRestHandler { */ public static final String TOTAL_HITS_AS_INT_PARAM = "rest_total_hits_as_int"; public static final String TYPED_KEYS_PARAM = "typed_keys"; + public static final String INCLUDE_NAMED_QUERIES_SCORE_PARAM = "include_named_queries_score"; private static final Set RESPONSE_PARAMS; static { - final Set responseParams = new HashSet<>(Arrays.asList(TYPED_KEYS_PARAM, TOTAL_HITS_AS_INT_PARAM)); + final Set responseParams = new HashSet<>( + Arrays.asList(TYPED_KEYS_PARAM, TOTAL_HITS_AS_INT_PARAM, INCLUDE_NAMED_QUERIES_SCORE_PARAM) + ); RESPONSE_PARAMS = Collections.unmodifiableSet(responseParams); } @@ -209,6 +212,7 @@ public static void parseSearchRequest( searchRequest.pipeline(request.param("search_pipeline")); checkRestTotalHits(request, searchRequest); + request.paramAsBoolean(INCLUDE_NAMED_QUERIES_SCORE_PARAM, false); if (searchRequest.pointInTimeBuilder() != null) { preparePointInTime(searchRequest, request, namedWriteableRegistry); diff --git a/server/src/main/java/org/opensearch/search/SearchHit.java b/server/src/main/java/org/opensearch/search/SearchHit.java index 10e65fca3afb5..58b57b24a4bab 100644 --- a/server/src/main/java/org/opensearch/search/SearchHit.java +++ b/server/src/main/java/org/opensearch/search/SearchHit.java @@ -64,16 +64,17 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.SourceFieldMapper; import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.rest.action.search.RestSearchAction; import org.opensearch.search.fetch.subphase.highlight.HighlightField; import org.opensearch.search.lookup.SourceLookup; import org.opensearch.transport.RemoteClusterAware; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -120,7 +121,7 @@ public final class SearchHit implements Writeable, ToXContentObject, Iterable matchedQueries = Collections.emptyMap(); private Explanation explanation; @@ -203,10 +204,14 @@ public SearchHit(StreamInput in) throws IOException { sortValues = new SearchSortValues(in); size = in.readVInt(); - if (size > 0) { - matchedQueries = new String[size]; + if (in.getVersion().after(Version.V_2_12_0)) { + if (size > 0) { + matchedQueries = in.readOrderedMap(StreamInput::readString, StreamInput::readFloat); + } + } else { + matchedQueries = new LinkedHashMap<>(size); for (int i = 0; i < size; i++) { - matchedQueries[i] = in.readString(); + matchedQueries.put(in.readString(), Float.NaN); } } // we call the setter here because that also sets the local index parameter @@ -224,36 +229,6 @@ public SearchHit(StreamInput in) throws IOException { } } - private Map readFields(StreamInput in) throws IOException { - Map fields; - int size = in.readVInt(); - if (size == 0) { - fields = emptyMap(); - } else if (size == 1) { - DocumentField hitField = new DocumentField(in); - fields = singletonMap(hitField.getName(), hitField); - } else { - fields = new HashMap<>(size); - for (int i = 0; i < size; i++) { - DocumentField field = new DocumentField(in); - fields.put(field.getName(), field); - } - fields = unmodifiableMap(fields); - } - return fields; - } - - private void writeFields(StreamOutput out, Map fields) throws IOException { - if (fields == null) { - out.writeVInt(0); - } else { - out.writeVInt(fields.size()); - for (DocumentField field : fields.values()) { - field.writeTo(out); - } - } - } - private static final Text SINGLE_MAPPING_TYPE = new Text(MapperService.SINGLE_MAPPING_NAME); @Override @@ -286,11 +261,13 @@ public void writeTo(StreamOutput out) throws IOException { } sortValues.writeTo(out); - if (matchedQueries.length == 0) { - out.writeVInt(0); + out.writeVInt(matchedQueries.size()); + if (out.getVersion().after(Version.V_2_12_0)) { + if (!matchedQueries.isEmpty()) { + out.writeMap(matchedQueries, StreamOutput::writeString, StreamOutput::writeFloat); + } } else { - out.writeVInt(matchedQueries.length); - for (String matchedFilter : matchedQueries) { + for (String matchedFilter : matchedQueries.keySet()) { out.writeString(matchedFilter); } } @@ -458,11 +435,11 @@ public DocumentField field(String fieldName) { } /* - * Adds a new DocumentField to the map in case both parameters are not null. - * */ + * Adds a new DocumentField to the map in case both parameters are not null. + * */ public void setDocumentField(String fieldName, DocumentField field) { if (fieldName == null || field == null) return; - if (documentFields.size() == 0) this.documentFields = new HashMap<>(); + if (documentFields.isEmpty()) this.documentFields = new HashMap<>(); this.documentFields.put(fieldName, field); } @@ -475,7 +452,7 @@ public DocumentField removeDocumentField(String fieldName) { * were required to be loaded. */ public Map getFields() { - if (metaFields.size() > 0 || documentFields.size() > 0) { + if (!metaFields.isEmpty() || !documentFields.isEmpty()) { final Map fields = new HashMap<>(); fields.putAll(metaFields); fields.putAll(documentFields); @@ -559,7 +536,7 @@ public String getClusterAlias() { return clusterAlias; } - public void matchedQueries(String[] matchedQueries) { + public void matchedQueries(Map matchedQueries) { this.matchedQueries = matchedQueries; } @@ -567,7 +544,21 @@ public void matchedQueries(String[] matchedQueries) { * The set of query and filter names the query matched with. Mainly makes sense for compound filters and queries. */ public String[] getMatchedQueries() { - return this.matchedQueries; + return matchedQueries == null ? new String[0] : matchedQueries.keySet().toArray(new String[0]); + } + + /** + * @return The score of the provided named query if it matches, {@code null} otherwise. + */ + public Float getMatchedQueryScore(String name) { + return getMatchedQueriesAndScores().get(name); + } + + /** + * @return The map of the named queries that matched and their associated score. + */ + public Map getMatchedQueriesAndScores() { + return matchedQueries == null ? Collections.emptyMap() : matchedQueries; } /** @@ -654,7 +645,7 @@ public XContentBuilder toInnerXContent(XContentBuilder builder, Params params) t for (DocumentField field : metaFields.values()) { // ignore empty metadata fields - if (field.getValues().size() == 0) { + if (field.getValues().isEmpty()) { continue; } // _ignored is the only multi-valued meta field @@ -670,10 +661,10 @@ public XContentBuilder toInnerXContent(XContentBuilder builder, Params params) t } if (documentFields.isEmpty() == false && // ignore fields all together if they are all empty - documentFields.values().stream().anyMatch(df -> df.getValues().size() > 0)) { + documentFields.values().stream().anyMatch(df -> !df.getValues().isEmpty())) { builder.startObject(Fields.FIELDS); for (DocumentField field : documentFields.values()) { - if (field.getValues().size() > 0) { + if (!field.getValues().isEmpty()) { field.toXContent(builder, params); } } @@ -687,12 +678,21 @@ public XContentBuilder toInnerXContent(XContentBuilder builder, Params params) t builder.endObject(); } sortValues.toXContent(builder, params); - if (matchedQueries.length > 0) { - builder.startArray(Fields.MATCHED_QUERIES); - for (String matchedFilter : matchedQueries) { - builder.value(matchedFilter); + if (matchedQueries != null && !matchedQueries.isEmpty()) { + boolean includeMatchedQueriesScore = params.paramAsBoolean(RestSearchAction.INCLUDE_NAMED_QUERIES_SCORE_PARAM, false); + if (includeMatchedQueriesScore) { + builder.startObject(Fields.MATCHED_QUERIES); + for (Map.Entry entry : matchedQueries.entrySet()) { + builder.field(entry.getKey(), entry.getValue()); + } + builder.endObject(); + } else { + builder.startArray(Fields.MATCHED_QUERIES); + for (String matchedFilter : matchedQueries.keySet()) { + builder.value(matchedFilter); + } + builder.endArray(); } - builder.endArray(); } if (getExplanation() != null) { builder.field(Fields._EXPLANATION); @@ -797,7 +797,25 @@ public static void declareInnerHitsParseFields(ObjectParser, (p, c) -> parseInnerHits(p), new ParseField(Fields.INNER_HITS) ); - parser.declareStringArray((map, list) -> map.put(Fields.MATCHED_QUERIES, list), new ParseField(Fields.MATCHED_QUERIES)); + parser.declareField((p, map, context) -> { + XContentParser.Token token = p.currentToken(); + Map matchedQueries = new LinkedHashMap<>(); + if (token == XContentParser.Token.START_OBJECT) { + String fieldName = null; + while ((token = p.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = p.currentName(); + } else if (token.isValue()) { + matchedQueries.put(fieldName, p.floatValue()); + } + } + } else if (token == XContentParser.Token.START_ARRAY) { + while (p.nextToken() != XContentParser.Token.END_ARRAY) { + matchedQueries.put(p.text(), Float.NaN); + } + } + map.put(Fields.MATCHED_QUERIES, matchedQueries); + }, new ParseField(Fields.MATCHED_QUERIES), ObjectParser.ValueType.OBJECT_ARRAY); parser.declareField( (map, list) -> map.put(Fields.SORT, list), SearchSortValues::fromXContent, @@ -828,7 +846,7 @@ public static SearchHit createFromMap(Map values) { assert shardId.getIndexName().equals(index); searchHit.shard(new SearchShardTarget(nodeId, shardId, clusterAlias, OriginalIndices.NONE)); } else { - // these fields get set anyways when setting the shard target, + // these fields get set anyway when setting the shard target, // but we set them explicitly when we don't have enough info to rebuild the shard target searchHit.index = index; searchHit.clusterAlias = clusterAlias; @@ -842,10 +860,7 @@ public static SearchHit createFromMap(Map values) { searchHit.sourceRef(get(SourceFieldMapper.NAME, values, null)); searchHit.explanation(get(Fields._EXPLANATION, values, null)); searchHit.setInnerHits(get(Fields.INNER_HITS, values, null)); - List matchedQueries = get(Fields.MATCHED_QUERIES, values, null); - if (matchedQueries != null) { - searchHit.matchedQueries(matchedQueries.toArray(new String[0])); - } + searchHit.matchedQueries(get(Fields.MATCHED_QUERIES, values, null)); return searchHit; } @@ -965,7 +980,7 @@ public boolean equals(Object obj) { && Objects.equals(documentFields, other.documentFields) && Objects.equals(metaFields, other.metaFields) && Objects.equals(getHighlightFields(), other.getHighlightFields()) - && Arrays.equals(matchedQueries, other.matchedQueries) + && Objects.equals(getMatchedQueriesAndScores(), other.getMatchedQueriesAndScores()) && Objects.equals(explanation, other.explanation) && Objects.equals(shard, other.shard) && Objects.equals(innerHits, other.innerHits) @@ -985,7 +1000,7 @@ public int hashCode() { documentFields, metaFields, getHighlightFields(), - Arrays.hashCode(matchedQueries), + getMatchedQueriesAndScores(), explanation, shard, innerHits, diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/MatchedQueriesPhase.java b/server/src/main/java/org/opensearch/search/fetch/subphase/MatchedQueriesPhase.java index 6c589438d6b4c..6734bc11fd5c0 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/MatchedQueriesPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/MatchedQueriesPhase.java @@ -34,18 +34,16 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; -import org.apache.lucene.util.Bits; -import org.opensearch.common.lucene.Lucene; import org.opensearch.search.fetch.FetchContext; import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.search.fetch.FetchSubPhaseProcessor; import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; +import java.util.LinkedHashMap; import java.util.Map; /** @@ -71,12 +69,12 @@ public FetchSubPhaseProcessor getProcessor(FetchContext context) throws IOExcept for (Map.Entry entry : namedQueries.entrySet()) { weights.put( entry.getKey(), - context.searcher().createWeight(context.searcher().rewrite(entry.getValue()), ScoreMode.COMPLETE_NO_SCORES, 1) + context.searcher().createWeight(context.searcher().rewrite(entry.getValue()), ScoreMode.COMPLETE, 1) ); } return new FetchSubPhaseProcessor() { - final Map matchingIterators = new HashMap<>(); + final Map matchingIterators = new HashMap<>(); @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { @@ -84,22 +82,28 @@ public void setNextReader(LeafReaderContext readerContext) throws IOException { for (Map.Entry entry : weights.entrySet()) { ScorerSupplier ss = entry.getValue().scorerSupplier(readerContext); if (ss != null) { - Bits matchingBits = Lucene.asSequentialAccessBits(readerContext.reader().maxDoc(), ss); - matchingIterators.put(entry.getKey(), matchingBits); + Scorer scorer = ss.get(0L); + if (scorer != null) { + matchingIterators.put(entry.getKey(), scorer); + } } } } @Override - public void process(HitContext hitContext) { - List matches = new ArrayList<>(); + public void process(HitContext hitContext) throws IOException { + Map matches = new LinkedHashMap<>(); int doc = hitContext.docId(); - for (Map.Entry iterator : matchingIterators.entrySet()) { - if (iterator.getValue().get(doc)) { - matches.add(iterator.getKey()); + for (Map.Entry entry : matchingIterators.entrySet()) { + Scorer scorer = entry.getValue(); + if (scorer.iterator().docID() < doc) { + scorer.iterator().advance(doc); + } + if (scorer.iterator().docID() == doc) { + matches.put(entry.getKey(), scorer.score()); } } - hitContext.hit().matchedQueries(matches.toArray(new String[0])); + hitContext.hit().matchedQueries(matches); } }; } diff --git a/server/src/test/java/org/opensearch/common/io/stream/BytesStreamsTests.java b/server/src/test/java/org/opensearch/common/io/stream/BytesStreamsTests.java index 370c691daf401..1a5118ac57d9d 100644 --- a/server/src/test/java/org/opensearch/common/io/stream/BytesStreamsTests.java +++ b/server/src/test/java/org/opensearch/common/io/stream/BytesStreamsTests.java @@ -536,6 +536,40 @@ public void testWriteMap() throws IOException { assertThat(expected, equalTo(loaded)); } + public void testReadOrderedMapEmpty() throws IOException { + BytesStreamOutput out = new BytesStreamOutput(); + out.writeVInt(0); + + StreamInput in = StreamInput.wrap(BytesReference.toBytes(out.bytes())); + Map result = in.readOrderedMap(StreamInput::readInt, StreamInput::readString); + assertTrue("Map should be empty", result.isEmpty()); + + // If the map is empty, it's not necessarily a LinkedHashMap due to optimization + assertTrue("Resulting map should be immutable", Collections.emptyMap().getClass().isInstance(result)); + + in.close(); + out.close(); + } + + public void testReadOrderedMapNonEmpty() throws IOException { + BytesStreamOutput out = new BytesStreamOutput(); + out.writeVInt(2); + out.writeInt(1); + out.writeString("One"); + out.writeInt(2); + out.writeString("Two"); + + StreamInput in = StreamInput.wrap(BytesReference.toBytes(out.bytes())); + Map result = in.readOrderedMap(StreamInput::readInt, StreamInput::readString); + assertEquals("Map should have 2 elements", 2, result.size()); + assertTrue("Resulting map should be a LinkedHashMap", result instanceof LinkedHashMap); + assertEquals("First element should be 'One'", "One", result.get(1)); + assertEquals("Second element should be 'Two'", "Two", result.get(2)); + + in.close(); + out.close(); + } + public void testWriteMapOfLists() throws IOException { final int size = randomIntBetween(0, 5); final Map> expected = new HashMap<>(size); diff --git a/server/src/test/java/org/opensearch/search/SearchHitTests.java b/server/src/test/java/org/opensearch/search/SearchHitTests.java index 88d5fb38a6cb1..8b407152e5c63 100644 --- a/server/src/test/java/org/opensearch/search/SearchHitTests.java +++ b/server/src/test/java/org/opensearch/search/SearchHitTests.java @@ -61,6 +61,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Predicate; @@ -76,6 +77,25 @@ import static org.hamcrest.Matchers.nullValue; public class SearchHitTests extends AbstractWireSerializingTestCase { + + private Map getSampleMatchedQueries() { + Map matchedQueries = new LinkedHashMap<>(); + matchedQueries.put("query1", 1.0f); + matchedQueries.put("query2", 0.5f); + return matchedQueries; + } + + public static SearchHit createTestItemWithMatchedQueriesScores(boolean withOptionalInnerHits, boolean withShardTarget) { + var searchHit = createTestItem(randomFrom(XContentType.values()), withOptionalInnerHits, withShardTarget); + int size = randomIntBetween(1, 5); // Ensure at least one matched query + Map matchedQueries = new LinkedHashMap<>(size); + for (int i = 0; i < size; i++) { + matchedQueries.put(randomAlphaOfLength(5), randomFloat()); + } + searchHit.matchedQueries(matchedQueries); + return searchHit; + } + public static SearchHit createTestItem(boolean withOptionalInnerHits, boolean withShardTarget) { return createTestItem(randomFrom(XContentType.values()), withOptionalInnerHits, withShardTarget); } @@ -129,9 +149,9 @@ public static SearchHit createTestItem(final MediaType mediaType, boolean withOp } if (randomBoolean()) { int size = randomIntBetween(0, 5); - String[] matchedQueries = new String[size]; + Map matchedQueries = new LinkedHashMap<>(size); for (int i = 0; i < size; i++) { - matchedQueries[i] = randomAlphaOfLength(5); + matchedQueries.put(randomAlphaOfLength(5), Float.NaN); } hit.matchedQueries(matchedQueries); } @@ -219,6 +239,13 @@ public void testFromXContentLenientParsing() throws IOException { assertToXContentEquivalent(originalBytes, toXContent(parsed, xContentType, true), xContentType); } + public void testSerializationDeserializationWithMatchedQueriesScores() throws IOException { + SearchHit searchHit = createTestItemWithMatchedQueriesScores(true, true); + SearchHit deserializedSearchHit = copyWriteable(searchHit, getNamedWriteableRegistry(), SearchHit::new, Version.V_3_0_0); + assertEquals(searchHit, deserializedSearchHit); + assertEquals(searchHit.getMatchedQueriesAndScores(), deserializedSearchHit.getMatchedQueriesAndScores()); + } + /** * When e.g. with "stored_fields": "_none_", only "_index" and "_score" are returned. */ @@ -244,6 +271,125 @@ public void testToXContent() throws IOException { assertEquals("{\"_id\":\"id1\",\"_score\":1.5}", builder.toString()); } + public void testSerializeShardTargetWithNewVersion() throws Exception { + String clusterAlias = randomBoolean() ? null : "cluster_alias"; + SearchShardTarget target = new SearchShardTarget( + "_node_id", + new ShardId(new Index("_index", "_na_"), 0), + clusterAlias, + OriginalIndices.NONE + ); + + Map innerHits = new HashMap<>(); + SearchHit innerHit1 = new SearchHit(0, "_id", null, null); + innerHit1.shard(target); + SearchHit innerInnerHit2 = new SearchHit(0, "_id", null, null); + innerInnerHit2.shard(target); + innerHits.put("1", new SearchHits(new SearchHit[] { innerInnerHit2 }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1f)); + innerHit1.setInnerHits(innerHits); + SearchHit innerHit2 = new SearchHit(0, "_id", null, null); + innerHit2.shard(target); + SearchHit innerHit3 = new SearchHit(0, "_id", null, null); + innerHit3.shard(target); + + innerHits = new HashMap<>(); + SearchHit hit1 = new SearchHit(0, "_id", null, null); + innerHits.put("1", new SearchHits(new SearchHit[] { innerHit1, innerHit2 }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1f)); + innerHits.put("2", new SearchHits(new SearchHit[] { innerHit3 }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1f)); + hit1.shard(target); + hit1.setInnerHits(innerHits); + + SearchHit hit2 = new SearchHit(0, "_id", null, null); + hit2.shard(target); + + SearchHits hits = new SearchHits(new SearchHit[] { hit1, hit2 }, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 1f); + + SearchHits results = copyWriteable(hits, getNamedWriteableRegistry(), SearchHits::new, Version.V_3_0_0); + SearchShardTarget deserializedTarget = results.getAt(0).getShard(); + assertThat(deserializedTarget, equalTo(target)); + assertThat(results.getAt(0).getInnerHits().get("1").getAt(0).getShard(), notNullValue()); + assertThat(results.getAt(0).getInnerHits().get("1").getAt(0).getInnerHits().get("1").getAt(0).getShard(), notNullValue()); + assertThat(results.getAt(0).getInnerHits().get("1").getAt(1).getShard(), notNullValue()); + assertThat(results.getAt(0).getInnerHits().get("2").getAt(0).getShard(), notNullValue()); + for (SearchHit hit : results) { + assertEquals(clusterAlias, hit.getClusterAlias()); + if (hit.getInnerHits() != null) { + for (SearchHits innerhits : hit.getInnerHits().values()) { + for (SearchHit innerHit : innerhits) { + assertEquals(clusterAlias, innerHit.getClusterAlias()); + } + } + } + } + assertThat(results.getAt(1).getShard(), equalTo(target)); + } + + public void testSerializeShardTargetWithNewVersionAndMatchedQueries() throws Exception { + String clusterAlias = randomBoolean() ? null : "cluster_alias"; + SearchShardTarget target = new SearchShardTarget( + "_node_id", + new ShardId(new Index("_index", "_na_"), 0), + clusterAlias, + OriginalIndices.NONE + ); + + Map innerHits = new HashMap<>(); + SearchHit innerHit1 = new SearchHit(0, "_id", null, null); + innerHit1.shard(target); + innerHit1.matchedQueries(getSampleMatchedQueries()); + SearchHit innerInnerHit2 = new SearchHit(0, "_id", null, null); + innerInnerHit2.shard(target); + innerHits.put("1", new SearchHits(new SearchHit[] { innerInnerHit2 }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1f)); + innerHit1.setInnerHits(innerHits); + SearchHit innerHit2 = new SearchHit(0, "_id", null, null); + innerHit2.shard(target); + innerHit2.matchedQueries(getSampleMatchedQueries()); + SearchHit innerHit3 = new SearchHit(0, "_id", null, null); + innerHit3.shard(target); + innerHit3.matchedQueries(getSampleMatchedQueries()); + + innerHits = new HashMap<>(); + SearchHit hit1 = new SearchHit(0, "_id", null, null); + innerHits.put("1", new SearchHits(new SearchHit[] { innerHit1, innerHit2 }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1f)); + innerHits.put("2", new SearchHits(new SearchHit[] { innerHit3 }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1f)); + hit1.shard(target); + hit1.setInnerHits(innerHits); + + SearchHit hit2 = new SearchHit(0, "_id", null, null); + hit2.shard(target); + + SearchHits hits = new SearchHits(new SearchHit[] { hit1, hit2 }, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 1f); + + SearchHits results = copyWriteable(hits, getNamedWriteableRegistry(), SearchHits::new, Version.V_3_0_0); + SearchShardTarget deserializedTarget = results.getAt(0).getShard(); + assertThat(deserializedTarget, equalTo(target)); + assertThat(results.getAt(0).getInnerHits().get("1").getAt(0).getShard(), notNullValue()); + assertThat(results.getAt(0).getInnerHits().get("1").getAt(0).getInnerHits().get("1").getAt(0).getShard(), notNullValue()); + assertThat(results.getAt(0).getInnerHits().get("1").getAt(1).getShard(), notNullValue()); + assertThat(results.getAt(0).getInnerHits().get("2").getAt(0).getShard(), notNullValue()); + String[] expectedMatchedQueries = new String[] { "query1", "query2" }; + String[] actualMatchedQueries = results.getAt(0).getInnerHits().get("1").getAt(0).getMatchedQueries(); + assertArrayEquals(expectedMatchedQueries, actualMatchedQueries); + + Map expectedMatchedQueriesAndScores = new LinkedHashMap<>(); + expectedMatchedQueriesAndScores.put("query1", 1.0f); + expectedMatchedQueriesAndScores.put("query2", 0.5f); + + Map actualMatchedQueriesAndScores = results.getAt(0).getInnerHits().get("1").getAt(0).getMatchedQueriesAndScores(); + assertEquals(expectedMatchedQueriesAndScores, actualMatchedQueriesAndScores); + for (SearchHit hit : results) { + assertEquals(clusterAlias, hit.getClusterAlias()); + if (hit.getInnerHits() != null) { + for (SearchHits innerhits : hit.getInnerHits().values()) { + for (SearchHit innerHit : innerhits) { + assertEquals(clusterAlias, innerHit.getClusterAlias()); + } + } + } + } + assertThat(results.getAt(1).getShard(), equalTo(target)); + } + public void testSerializeShardTarget() throws Exception { String clusterAlias = randomBoolean() ? null : "cluster_alias"; SearchShardTarget target = new SearchShardTarget(